t_start = Sys.time()
# CRAN packages:
library(tidyverse)
library(sf)
library(lubridate)
library(tidycensus)
library(ggExtra)
library(ggridges)
library(ggsn)
library(ragg)
library(rstan)
library(drc)
library(spdep)
library(broom)
library(MASS)
library(spatialreg)
library(here)
library(pdftools)
library(matrixStats)
library(egg)
library(ggpubr)
library(scales)
library(qs)
library(corrplot)
library(readxl)
library(splines)
library(magic)
library(httr)
library(jsonlite)
library(DHARMa)
library(kableExtra)
library(lwgeom)
# Github packages available via:
# remotes::install_github("justlab/Just_universal")
# remotes::install_github("justlab/MTA_turnstile")
library(Just.universal)
#### SESSION CONFIGURATION ####
options(dplyr.summarise.inform=FALSE)
# data will default to a subfolder "data/" within working directory
# unless 1. set by an environment variable:
data.root = Sys.getenv("COVID_DATA")
# or 2. set with an alternative path here:
if (data.root == "") data.root = here("data")
if (!dir.exists(data.root)) dir.create(data.root)
print(paste("data being downloaded into directory", dQuote(data.root)))
## [1] "data being downloaded into directory \"/data/scratch/COVID_runtime/data\""
# Get some data from the git repository rather than downloading from original
# source, to avoid changes in model results due to updated data
use_repo_data = TRUE
if(Sys.getenv("MTA_TURNSTILE_DATA_DIR") == ""){
# set up default download location for MTA turnstile data
mta_dir = file.path(data.root, "mta_turnstile")
if(!dir.exists(mta_dir)) dir.create(mta_dir, recursive = TRUE)
Sys.setenv(MTA_TURNSTILE_DATA_DIR = mta_dir)
}
library(MTA.turnstile)
## MTA.turnstile data directory: /data/scratch/COVID_runtime/data/mta_turnstile
t_turnstile_1 = Sys.time()
# output path for figures
fig.path = here("figures")
if(!dir.exists(fig.path)) dir.create(fig.path)
export.figs = TRUE
if(export.figs) message("Saving figures to:\n ", fig.path) else message("Not saving figures")
## Saving figures to:
## /data/scratch/COVID_runtime/figures
# To generate census data, you need an API key, which you can request here: https://api.census.gov/data/key_signup.html
#census_api_key("INSERT YOUR CENSUS API KEY HERE", install = TRUE)
if(Sys.getenv("CENSUS_API_KEY")=="") warning("Census API Key Missing")
# pairmemo function cache
pairmemo.dir = file.path(data.root, "pairmemo")
dir.create(pairmemo.dir, showWarnings = F)
pm = function(...) pairmemo(
directory = pairmemo.dir,
n.frame = 2,
...)
pairmemo caches the results of functions on disk based on their input parameters.
If you run this script and then make changes to code, we recommend that you clear the pairmemo cache of any functions that run in the lines following your change before running the script again.
The following commented lines would clear the pairmemo cache for each function. If you do not change the input Census or Pluto data, you would likely only need to run the last one.
#pairmemo.clear(get.Pluto)
#pairmemo.clear(get.qn.blocks)
#pairmemo.clear(acs.main)
#pairmemo.clear(get_essential_acs)
#pairmemo.clear(get.tract.res)
#pairmemo.clear(fit_BWQS_model)
#### Functions ####
read_w_filenames <- function(flnm) {
read_csv(flnm) %>%
mutate(filename = flnm)
}
extract_waic <- function (stanfit){
log_lik <- rstan::extract(stanfit, "log_lik")$log_lik
dim(log_lik) <- if (length(dim(log_lik)) == 1)
c(length(log_lik), 1)
else c(dim(log_lik)[1], prod(dim(log_lik)[2:length(dim(log_lik))]))
S <- nrow(log_lik)
n <- ncol(log_lik)
lpd <- log(colMeans(exp(log_lik)))
p_waic <- colVars(log_lik)
elpd_waic <- lpd - p_waic
waic <- -2 * elpd_waic
loo_weights_raw <- 1/exp(log_lik - max(log_lik))
loo_weights_normalized <- loo_weights_raw/matrix(colMeans(loo_weights_raw),
nrow = S, ncol = n, byrow = TRUE)
loo_weights_regularized <- pmin(loo_weights_normalized, sqrt(S))
elpd_loo <- log(colMeans(exp(log_lik) * loo_weights_regularized)/colMeans(loo_weights_regularized))
p_loo <- lpd - elpd_loo
pointwise <- cbind(waic, lpd, p_waic, elpd_waic, p_loo, elpd_loo)
total <- colSums(pointwise)
se <- sqrt(n * colVars(pointwise))
return(list(waic = total["waic"], elpd_waic = total["elpd_waic"],
p_waic = total["p_waic"], elpd_loo = total["elpd_loo"],
p_loo = total["p_loo"]))
}
# Download a file, update metadata records, and load it with function `f`
# File metadata is stored in a sqlite file, by default in data/downloads/meta.sqlite
download = function(url, to, f, ...){
f(download.update.meta(url, file.path(data.root, "downloads"), to),
...)
}
#### Load Data ####
Progress bars are a single line when run interactively, but print every refresh with knitr
# Subway ridership data
Subway_ridership_by_UHF <- relative.subway.usage(2020L, "nhood")
## Reading turnstile files
## Processing
## Parsing timestamps
## Sorting
## Setting station names
## Dropping 3 unlocated stations
## Dropping 278,230 observations
## Decumulating - entries
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
[manually edited for length]
|
|======================================================================| 99%
|
|======================================================================| 100%
## Dropped 189 sources of 5,292
## Dropped 1,102,556 observations of 60,962,516
## Decumulating - exits
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
[manually edited for length]
|
|======================================================================| 99%
|
|======================================================================| 100%
## Dropped 173 sources of 5,292
## Dropped 868,952 observations of 60,962,516
t_turnstile_2 = Sys.time()
# get the Pluto dataset from #https://www1.nyc.gov/site/planning/data-maps/open-data/dwn-pluto-mappluto.page
pm(fst = T,
get.Pluto <- function() download(
"https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyc_pluto_20v3_csv.zip",
"pluto.zip",
function(p)
read_csv(unz(p, "pluto_20v3.csv"), col_types = cols(spdist2 = col_character(),
overlay2 = col_character(),
zonedist4 = col_character()))[,
c("landuse", "bbl", "numfloors", "unitstotal", "unitsres",
"zipcode")]))
Pluto <- as.data.frame(get.Pluto())
# If you get an error in running this function, the download may be incomplete/corrupt
# In that case, delete the pluto.zip file and try again:
#unlink(file.path(data.root, "downloads", "pluto.zip"))
if (file.exists(file.path(data.root, "Bldg_Footprints.qs"))) {
Bldg_Footprints <- qread(file.path(data.root, "Bldg_Footprints.qs"))
} else {
Bldg_Footprints <- download(
# https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh
"https://data.cityofnewyork.us/api/geospatial/nqwf-w8eh?method=export&format=Shapefile",
"building_footprints.zip",
function(p)
st_read(paste0("/vsizip/", p)))
qsave(Bldg_Footprints, file.path(data.root, "Bldg_Footprints.qs"))
}
## Multiple layers are present in data source /vsizip//data/scratch/COVID_runtime/data/downloads/building_footprints.zip, reading layer `geo_export_aad3a557-b481-4681-b754-34463d92a741'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `geo_export_aad3a557-b481-4681-b754-34463d92a741' from data source `/vsizip//data/scratch/COVID_runtime/data/downloads/building_footprints.zip' using driver `ESRI Shapefile'
## Simple feature collection with 1084927 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -74.2555 ymin: 40.49843 xmax: -73.70006 ymax: 40.91505
## CRS: 4326
ZCTA_by_boro <- download(
"https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm",
"uhf_neighborhoods.html",
function(p)
{# XML::readHTMLTable doesn't identify the columns correctly.
x = str_match_all(read_file(p), regex(dotall = T, paste0(
'<td headers="header1"[^>]+>\\s*(.+?)</td>',
'(.+?)',
'(?=<td headers="header1"|</table>)')))[[1]]
do.call(rbind, lapply(1 : nrow(x), function(i)
data.frame(boro = x[i, 2], zip = as.integer(
str_extract_all(x[i, 3], "\\b\\d{5}\\b")[[1]]))))})
# Download the specific day of test results by ZCTA being used
ZCTA_test_download <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/6d7c4a94d6472a9ffc061166d099a4e5d89cd3e3/tests-by-zcta.csv",
"2020-05-07_tests-by-zcta.csv",
identity
)
# Download COVID-19 testing data
ZCTA_test_series <- ZCTA_test_download %>%
map_df(~read_w_filenames(.)) %>%
mutate(date = as.Date(str_extract(filename, "[:digit:]{4}-[:digit:]{2}-[:digit:]{2}"))) %>%
dplyr::select(-filename)
## Parsed with column specification:
## cols(
## MODZCTA = col_double(),
## Positive = col_double(),
## Total = col_double(),
## zcta_cum.perc_pos = col_double()
## )
# UHF definitions by zip codes
UHF_ZipCodes <- UHF_ZipCodes <- download(
"http://www.infoshare.org/misc/UHF.pdf",
"uhf_zips.pdf",
function(p)
{x = str_match_all(pdf_text(p)[2],
"(\\d+)\\s+(\\S.+?\\S)\\s*([0-9,]+)")[[1]]
do.call(rbind, lapply(1 : nrow(x), function(i)
data.frame(code = x[i, 2], name = x[i, 3], zip = as.integer(
str_extract_all(x[i, 4], "\\b\\d{5}\\b")[[1]]))))})
# UHF shapefile
UHF_shp <- download(
"https://www1.nyc.gov/assets/doh/downloads/zip/uhf42_dohmh_2009.zip",
"nyc_uhf_nhoods_shapefile.zip",
function(p) read_sf(paste0("/vsizip/", p, "/UHF_42_DOHMH_2009")))
# NYC boroughs from NYC Open Data
NYC_basemap_shp <- download(
"https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile",
"Borough_Boundaries.zip",
function(p){
unzip(p, exdir = file.path(data.root, "downloads"))
# open data platform generates a random UUID for every download
ufile = list.files(file.path(data.root, "downloads"), pattern = "geo_export_.*\\.shp", full.names = TRUE)
st_read(ufile, stringsAsFactors = FALSE, quiet = TRUE) %>% st_transform(., crs = 2263)
}
)
# DOHMH MODZCTA Shapefile
MODZCTA_NYC_shp <- download(
"https://data.cityofnewyork.us/api/geospatial/pri4-ifjk?method=export&format=Shapefile",
"Modified Zip Code Tabulation Areas (MODZCTA).zip",
function(p) read_sf(paste0("/vsizip/", p))
)
# Food outlets
if(use_repo_data){
food_retail <- read_csv(here("data", "retail_food_stores_2019-06-13.csv"))
} else {
food_retail <- download(
"https://data.ny.gov/api/views/9a8c-vfzj/rows.csv",
"retail_food_stores.csv",
read_csv)
}
## Parsed with column specification:
## cols(
## County = col_character(),
## `License Number` = col_character(),
## `Operation Type` = col_character(),
## `Establishment Type` = col_character(),
## `Entity Name` = col_character(),
## `DBA Name` = col_character(),
## `Street Number` = col_character(),
## `Street Name` = col_character(),
## `Address Line 2` = col_logical(),
## `Address Line 3` = col_logical(),
## City = col_character(),
## State = col_character(),
## `Zip Code` = col_double(),
## `Square Footage` = col_double(),
## Location = col_character()
## )
# Download deaths by ZCTA as of May 23rd
deaths_by23May2020_by_zcta <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/8d88b2c06cf6b65676d58b28979731faa10c193c/data-by-modzcta.csv",
"2020-05-23_data-by-modzcta.csv",
read_csv
)
## Parsed with column specification:
## cols(
## MODIFIED_ZCTA = col_double(),
## NEIGHBORHOOD_NAME = col_character(),
## BOROUGH_GROUP = col_character(),
## COVID_CASE_COUNT = col_double(),
## COVID_CASE_RATE = col_double(),
## POP_DENOMINATOR = col_double(),
## COVID_DEATH_COUNT = col_double(),
## COVID_DEATH_RATE = col_double(),
## PERCENT_POSITIVE = col_double()
## )
# download MODZCTA to ZCTA crosswalk, current version from repo
modzcta_to_zcta <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/ZCTA-to-MODZCTA.csv",
"ZCTA-to-MODZCTA.csv",
read_csv
)
## Parsed with column specification:
## cols(
## ZCTA = col_double(),
## MODZCTA = col_double()
## )
modzcta_to_zcta1 <- modzcta_to_zcta %>% mutate(ZCTA = as.character(ZCTA))
modzcta_to_zcta2 <- modzcta_to_zcta1 %>% mutate(MODZCTA = as.character(MODZCTA))
MODZCTAs_in_NYC <- as.character(unique(ZCTA_test_series$MODZCTA))
ZCTAs_in_NYC <- as.character(unique(modzcta_to_zcta$ZCTA))
# download ZIP to Tract crosswalk from HUD
zip_to_tract <- download(
"https://www.huduser.gov/portal/datasets/usps/ZIP_TRACT_062020.xlsx",
"ZIP_TRACT_062020.xlsx",
function(p) suppressWarnings(read_excel(path = p, col_types = c("text", "text", "numeric", "skip", "skip", "skip")))
)
# Block to ZCTA and County crosswalk for NY
ny_xwalk <- download("https://lehd.ces.census.gov/data/lodes/LODES7/ny/ny_xwalk.csv.gz",
"ny_xwalk.csv.gz",
function(p) {
zzf = gzfile(p)
read.csv(zzf) %>%
dplyr::select(cty, tabblk2010, zcta, blklondd, blklatdd) %>%
mutate(tabblk2010 = as.character(tabblk2010),
zcta = as.character(zcta),
cntyfips = as.character(cty)) %>%
dplyr::select(-cty)
}
)
# We have many sources of data, so these just help to combine the various data types
NYC_counties1 <- c("Bronx","Kings","Queens","New York","Richmond")
NYC_counties1_full <- c("Bronx County","Kings County","Queens County","New York County","Richmond County")
NYC_boro_county_match <- tibble(County = c("Bronx","Kings","Queens","New York","Richmond"),
boro = c("Bronx","Brooklyn","Queens","Manhattan","Staten Island"),
full_county = c("Bronx County","Kings County","Queens County","New York County","Richmond County"),
fips = c("36005", "36047", "36081", "36061", "36085"))
# stan model script
BWQS_stan_model <- here("code", "nb_bwqs_cov.stan")
#### Census Data ####
# function to pull 2010 block population for Queens & Nassau counties
pm(get.qn.blocks <- function(){
nassau_blk_pop <- get_decennial(geography = "block", variables = "P001001",
state = "NY", county = "Nassau", geometry = FALSE)
queens_blk_pop <- get_decennial(geography = "block", variables = "P001001",
state = "NY", county = "Queens", geometry = FALSE)
bind_rows(nassau_blk_pop, queens_blk_pop) %>%
dplyr::select(GEOID, value) %>% rename("pop2010" = "value")
})
# function to pull 2018 ACS data
pm(acs.main <- function(admin_unit = c("zcta", "tract"), state_unit = c(NULL, "NY"), sf_shapes = c(TRUE, FALSE)) {
ACS_Data <- get_acs(geography = admin_unit,
state = state_unit,
geometry = sf_shapes,
variables = c(medincome = "B19013_001",
total_pop1 = "B01003_001",
fpl_100 = "B06012_002",
fpl_100to150 = "B06012_003",
median_rent = "B25031_001",
total_hholds1 = "B22003_001",
hholds_snap = "B22003_002",
over16total_industry1 = "C24050_001",
ag_industry = "C24050_002",
construct_industry = "C24050_003",
manufact_industry = "C24050_004",
wholesaletrade_industry = "C24050_005",
retail_industry = "C24050_006",
transpo_and_utilities_industry = "C24050_007",
information_industry = "C24050_008",
finance_and_realestate_industry = "C24050_009",
science_mngmt_admin_industry = "C24050_010",
edu_health_socasst_industry = "C24050_011",
arts_entertain_rec_accomodate_industry = "C24050_012",
othsvcs_industry = "C24050_013",
publicadmin_industry = "C24050_014",
total_commute1 = "B08301_001",
drove_commute = "B08301_002",
pubtrans_bus_commute = "B08301_011",
pubtrans_subway_commute = "B08301_013",
pubtrans_railroad_commute = "B08301_013",
pubtrans_ferry_commute = "B08301_015",
taxi_commute = "B08301_016",
bicycle_commute = "B08301_018",
walked_commute = "B08301_019",
workhome_commute = "B08301_021",
unemployed = "B23025_005",
under19_noinsurance = "B27010_017",
age19_34_noinsurance = "B27010_033",
age35_64_noinsurance = "B27010_050",
age65plus_noinsurance = "B27010_066",
hisplat_raceethnic = "B03002_012",
nonhispLat_white_raceethnic = "B03002_003",
nonhispLat_black_raceethnic = "B03002_004",
nonhispLat_amerindian_raceethnic = "B03002_005",
nonhispLat_asian_raceethnic = "B03002_006",
age65_plus = "B08101_008"),
year = 2018,
output = "wide",
survey = "acs5")
if(admin_unit=="zcta"){
ACS_Data <- ACS_Data %>% #only pull out the estimates and cleaning variable names
filter(GEOID %in% ZCTAs_in_NYC) %>%
dplyr::select(-NAME) %>%
dplyr::select(GEOID, !ends_with("M")) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))
}
if(admin_unit=="tract"){
ACS_Data <- ACS_Data %>% #only pull out the estimates and cleaning variable names
filter(substr(GEOID,1,5) %in% NYC_boro_county_match$fips) %>% # Tracts in NYC counties
dplyr::select(-NAME) %>%
dplyr::select(GEOID, !ends_with("M")) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))
}
return(ACS_Data)
})
# Use 2010 block population to scale Nassau County ZCTA contribution to NYC MODZCTAs
# This is the method used according to data dictionary at NYC Open Data:
# https://data.cityofnewyork.us/Health/Modified-Zip-Code-Tabulation-Areas-MODZCTA-/pri4-ifjk
nassau_zcta_weights <- function(zcta_acs, mz_to_z, blk_to_z){
blk_pop = get.qn.blocks()
modzcta_span = c("11429", "11411", "11004") # MODZCTA with ZCTAs from both Queens & Nassau
border_zcta <- mz_to_z %>% filter(MODZCTA %in% modzcta_span) %>% pull(ZCTA)
# Filter block-zcta crosswalk table to Queens-Nassau ZCTA of interest
blk_to_z <- blk_to_z %>% filter(zcta %in% border_zcta)
# Join population to block-zcta crosswalk
xwalk_pop <- blk_to_z %>% left_join(blk_pop, by = c("tabblk2010" = "GEOID"))
# Summarise 2010 population by ZCTA to calculate proportions inside NYC
zcta_pop_2010 <- xwalk_pop %>%
group_by(zcta) %>%
summarise(z_pop_2010 = sum(pop2010), .groups = "drop_last")
queens_zcta_pop_2010 <- xwalk_pop %>%
filter(cntyfips == "36081") %>%
group_by(zcta) %>%
summarise(queens_z_pop_2010 = sum(pop2010), .groups = "drop_last")
zcta_pop_props <- queens_zcta_pop_2010 %>%
left_join(zcta_pop_2010, by = "zcta") %>%
mutate(in_NYC_prop = queens_z_pop_2010/z_pop_2010)
zcta_weights <- zcta_acs %>% dplyr::select(GEOID) %>%
left_join(zcta_pop_props, by = c("GEOID" = "zcta")) %>%
dplyr::select(GEOID, in_NYC_prop) %>%
mutate(in_NYC_prop =
case_when(is.na(in_NYC_prop) ~ 1,
TRUE ~ in_NYC_prop))
# Apply weights for all Census variables except median vars
zcta_acs <- zcta_acs %>% left_join(zcta_weights, by = "GEOID")
varvec <- 1:ncol(zcta_acs)
varvec <- varvec[-grep("GEOID|in_NYC_prop|medincome|median_rent", names(zcta_acs))]
zcta_acs <- zcta_acs %>% mutate_at(vars(all_of(varvec)), ~ . * in_NYC_prop)
}
# function to clean ACS data
clean_acs_data_and_derive_vars <- function(df, admin_unit = c("zcta", "tract")){
if(admin_unit=="zcta"){
ACS_Data1a <- df %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(medincome, median_rent), ~weighted.mean(., total_pop1, na.rm = T)) %>%
rename(zcta = "MODZCTA")
ACS_Data1b <- df %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(total_pop1:fpl_100to150, total_hholds1:age65_plus), ~sum(.)) %>%
mutate_at(vars(ends_with("_commute")), ~round((./total_commute1)*100, 2)) %>% #proportion of people relying on a given mode of transit
mutate_at(vars(ends_with("_raceethnic")), ~round((./total_pop1)*100, 2)) %>% #proportion of ppl reporting a given race/ethncity
mutate(not_insured = round(((under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) / total_pop1)*100, 2), #proportion uninsured
#snap_hholds = round((hholds_snap/total_hholds1)*100, 2), #proportion relying on SNAP
#fpl_150 = round(((fpl_100+fpl_100to150)/total_pop1)*100, 2), #proportion 150% or less of FPL
unemployed = round((unemployed/over16total_industry1)*100, 2), #proportion unemployed
not_quarantined_jobs = round(((ag_industry+(construct_industry*.25)+wholesaletrade_industry+ #an estimate of who is still leaving the house for work based on industry
(edu_health_socasst_industry*.5)+transpo_and_utilities_industry)/over16total_industry1)*100, 2)) %>%
dplyr::select(-ends_with("_noinsurance"), -fpl_100, -fpl_100to150, -ends_with("_industry"), -hholds_snap) %>%
rename(zcta = "MODZCTA")
ACS_Data2 <- left_join(ACS_Data1a, ACS_Data1b, by = "zcta") %>%
mutate(zcta = as.character(zcta))
} else{
ACS_Data2 <- df %>%
mutate_at(vars(ends_with("_commute")), ~round((./total_commute1)*100, 2)) %>% #proportion of people relying on a givenmode of transit
mutate_at(vars(ends_with("_raceethnic")), ~round((./total_pop1)*100, 2)) %>% #proportion of ppl reporting a given race/ethncity
mutate(not_insured = round(((under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) / total_pop1)*100, 2), #proportion uninsured
unemployed = round((unemployed/over16total_industry1)*100, 2), #proportion unemployed
not_quarantined_jobs = round(((ag_industry+(construct_industry*.25)+wholesaletrade_industry+ #an estimate of who is still leaving the house for work based on industry
(edu_health_socasst_industry*.5)+transpo_and_utilities_industry)/over16total_industry1)*100, 2)) %>%
dplyr::select(-ends_with("_noinsurance"), -ends_with("_industry"),-fpl_100, -fpl_100to150,-hholds_snap)
}
return(ACS_Data2)
}
# Functions to pull mode of transportation for our approximate of essential workers
pm(fst = T,
get_essential_acs <- function(admin_unit, state_unit) {
get_acs(geography = admin_unit, #pull down the relevant categories
state = state_unit,
variables = c(ag_car1_commute = "B08126_017",
ag_pubtrans_commute = "B08126_047",
construct_car1_commute ="B08126_018",
construct_pubtrans_commute = "B08126_048",
wholesale_car1_commute = "B08126_020",
wholesale_pubtrans_commute = "B08126_050",
transpo_car1_commute = "B08126_022",
transpo_pubtrans_commute = "B08126_052",
ed_hlthcare_car1_commute = "B08126_026",
ed_hlthcare_pubtrans_commute = "B08126_056"),
year = 2018,
output = "wide",
survey = "acs5")
})
acs.essential <- function(admin_unit, zcta_pop = NULL, state_unit = NULL) {
if(!admin_unit %in% c("zcta", "tract")) stop("admin_unit must be either 'zcta' or 'tract'")
if(admin_unit == "tract" & is.null(state_unit)) stop("state_unit must be set to download tracts")
if(!is.null(state_unit)) if(state_unit != "NY") stop("state_unit must be either NULL or 'NY'")
ACS_EssentialWrkr_Commute <- get_essential_acs(admin_unit = admin_unit, state_unit = state_unit)
if(admin_unit == "zcta"){
if(is.null(zcta_pop)) stop("zcta_pop must be set for scaling MODZCTA on Queens/Nassau boundary")
ACS_Essential_worker_estimates <- ACS_EssentialWrkr_Commute %>% #clean data and aggregate
dplyr::select(-ends_with("M"), -NAME) %>%
filter(GEOID %in% ZCTAs_in_NYC) %>%
# scale ZCTAs by proportion of population in NYC
right_join(zcta_pop %>% dplyr::select("GEOID", "in_NYC_prop"), by = "GEOID") %>%
mutate_at(vars(2:11), ~ . * in_NYC_prop) %>%
# summarize ZCTA to MODZCTA
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(2:11),
~ sum(., na.rm = T)) %>%
rename(zcta = "MODZCTA") %>%
mutate_at(vars(starts_with("ed_hlthcare")), ~ round(. / 2), 0) %>% #maintain same proportions as estimated nonquarintined jobs
mutate_at(vars(starts_with("construct")), ~ round(. / 4), 0) %>%
mutate(
essentialworker_drove = rowSums(dplyr::select(., contains("car1_commute"))),
essentialworker_pubtrans = rowSums(dplyr::select(., contains("pubtrans")))) %>%
dplyr::select(zcta, essentialworker_drove, essentialworker_pubtrans) %>%
mutate(zcta = as.character(zcta))
}
else { # tracts
ACS_Essential_worker_estimates <- ACS_EssentialWrkr_Commute %>% #clean data and aggregate
dplyr::select(-ends_with("M"), -NAME) %>%
filter(substr(GEOID,1,5) %in% NYC_boro_county_match$fips) %>% # Tracts in NYC counties
mutate_at(vars(starts_with("ed_hlthcare")), ~round(./2), 0) %>% #maintain same proportions as estimated nonquarintined jobs
mutate_at(vars(starts_with("construct")), ~round(./4), 0) %>%
mutate(essentialworker_drove = rowSums(dplyr::select(., contains("car1_commute"))),
essentialworker_pubtrans = rowSums(dplyr::select(., contains("pubtrans")))) %>%
dplyr::select(GEOID, essentialworker_drove, essentialworker_pubtrans)
}
return(ACS_Essential_worker_estimates)
}
# ZCTA CENSUS DATA
options(tigris_use_cache = TRUE)
ACS_Data1 <- as.data.frame(acs.main("zcta", NULL, FALSE)) #download the zcta data
## Getting data from the 2014-2018 5-year ACS
ACS_Data_scaled <- nassau_zcta_weights(ACS_Data1, modzcta_to_zcta2, ny_xwalk)
## Getting data from the 2010 decennial Census
## Getting data from the 2010 decennial Census
ACS_Data2 <- clean_acs_data_and_derive_vars(ACS_Data_scaled, "zcta")
ACS_EssentialWrkr_Commute1 = as.data.frame(acs.essential("zcta", zcta_pop = ACS_Data_scaled, state_unit = NULL))
## Getting data from the 2014-2018 5-year ACS
print(paste("The 2018 5-year ACS population range in NYC MODZCTAs is:", paste(range(ACS_Data2$total_pop1), collapse = "-")))
## [1] "The 2018 5-year ACS population range in NYC MODZCTAs is: 3028-112425"
# TRACT CENSUS DATA
acs_tracts <- acs.main("tract", "NY", TRUE)
## Getting data from the 2014-2018 5-year ACS
acs_tracts2 <- clean_acs_data_and_derive_vars(acs_tracts, "tract")
acs_tracts_commute1 = as.data.frame(acs.essential("tract", state_unit = "NY"))
## Getting data from the 2014-2018 5-year ACS
t_census = Sys.time()
#### Grocery stores per area ####
non_supermarket_strings <- c("DELI|TOBACCO|GAS|CANDY|7 ELEVEN|7-ELEVEN|LIQUOR|ALCOHOL|BAKERY|CHOCOLATE|DUANE READE|WALGREENS|CVS|RITE AID|RAVIOLI|WINERY|WINE|BEER|CAFE|COFFEE")
food_retail_filtered <- food_retail %>%
filter(County %in% NYC_boro_county_match$County) %>%
filter(str_detect(`Establishment Type`, "J") & str_detect(`Establishment Type`, "A") & str_detect(`Establishment Type`, "C") &
!str_detect(`Establishment Type`, "H")) %>%
filter(!str_detect(`Entity Name`, non_supermarket_strings) & !str_detect(`DBA Name`, non_supermarket_strings)) %>%
filter(`Square Footage`>=4500) %>%
mutate(zcta = as.character(str_extract(Location, "[:digit:]{5}"))) %>%
mutate(Address = case_when(
# some locations geocode better when address includes city name
`License Number` %in% c("638599", "712410", "706967", "710078") ~
paste(paste(`Street Number`, `Street Name`), City, State, zcta, sep = ","),
# but most geocode better without it, see limitation #4 at: http://gis.ny.gov/gisdata/inventories/details.cfm?DSID=1278
TRUE ~
paste(paste(`Street Number`, `Street Name`), State, zcta, sep = ",") )
)
# Geocode grocers, using a cached version if available to make analysis reproducible
# The geocoding service may be updated in the future and give different results
cached_grocers = file.path(data.root, "grocers_geocode_2020-11-09.csv")
if(file.exists(cached_grocers) & use_repo_data){
gctable <- read.csv(cached_grocers)
failed = which(gctable$score == 0)
message("Loaded cached geocoded grocers: ", nrow(gctable)-length(failed), "/", nrow(gctable), " have coordinates.") # 997/1037
if(nrow(gctable) != nrow(food_retail_filtered)) warning("Cached geocoded table has different row count than non-geocoded table")
} else {
# locations are returned in crs=26918, UTM 18N NAD83
api = "https://gisservices.its.ny.gov/arcgis/rest/services/Locators/Street_and_Address_Composite/GeocodeServer/findAddressCandidates?f=json&maxLocations=1&SingleLine="
message("Geocoding ", nrow(food_retail_filtered), " grocers with NY State ITS geocoder...")
t1 = Sys.time()
res = lapply(food_retail_filtered$Address, function(addr) {
GET(url = paste0(api, URLencode(addr))) })
tdiff = Sys.time() - t1
# extract results
geocodes = lapply(res, function(page) fromJSON(rawToChar(page$content), flatten = TRUE)$candidates)
failed = which(sapply(geocodes,class) != "data.frame")
geocodes[failed] <- lapply(1:length(failed), function(void){
data.frame(address = NA_character_, score = 0, location.x = NA_real_, location.y = NA_real_)})
gctable = bind_rows(geocodes)
message("Geocoded ", nrow(food_retail_filtered)-length(failed), "/", nrow(food_retail_filtered),
" grocers in ", round(as.numeric(tdiff),1), " ", attributes(tdiff)$units)
write.csv(gctable, paste0(data.root, "/grocers_geocode_", Sys.Date(), ".csv"))
}
## Loaded cached geocoded grocers: 997/1037 have coordinates.
# Count grocers by tract
gctable = filter(gctable, score > 0)
grocerSF = st_as_sf(gctable, coords = c("location.x", "location.y"), crs = 26918) %>% st_transform(crs = 2263)
tractSF = acs_tracts2[, "GEOID"] %>% st_transform(., crs = 2263)
tract_grocers = suppressWarnings(st_intersection(tractSF, grocerSF)) %>%
st_set_geometry(., NULL) %>%
group_by(GEOID) %>%
summarise(grocers = n_distinct(`address`))
#nrow(tract_grocers) # 749
#sum(tract_grocers$grocers) # 993
# Count grocers by ZCTA
zctaSF <- MODZCTA_NYC_shp %>% dplyr::select(modzcta, geometry) %>% st_transform(crs = 2263)
zcta_grocers <- suppressWarnings(st_intersection(zctaSF, grocerSF)) %>%
st_set_geometry(., NULL) %>%
group_by(modzcta) %>%
summarise(grocers = n_distinct(`address`))
# sum(zcta_grocers$grocers) # 993
# nrow(zcta_grocers) # 172
# range(zcta_grocers$grocers) # 1, 21
#### Subway station locations ####
SubwayStation_shp <- as_tibble(turnstile()$stations) %>%
st_as_sf(., coords = c("lon", "lat"), crs = 4269) %>%
st_transform(., crs = 2263) %>%
filter(!str_detect(ca, "PTH")) #removing New Jersey PATH stations
#### Residential area ####
Pluto_ResOnly <- Pluto %>%
filter(landuse>="01" & landuse<="04") %>%
mutate(base_bbl = as.character(bbl)) %>%
dplyr::select(-bbl)
ResBBLs <- as.character(Pluto_ResOnly$base_bbl)
# zcta-level residential area
Res_Bldg_Footprints <- Bldg_Footprints %>%
st_set_geometry(., NULL) %>%
mutate(base_bbl = as.character(base_bbl)) %>%
filter(base_bbl %in% ResBBLs &
feat_code == "2100") %>%
mutate(bldg_volume = shape_area * heightroof) %>%
left_join(., Pluto_ResOnly, by = "base_bbl") %>%
mutate(bldg_volume = if_else(is.na(bldg_volume), shape_area*numfloors*10, bldg_volume),
res_volume = (bldg_volume/unitstotal)*unitsres,
zcta = as.character(zipcode)) %>%
group_by(zcta) %>%
summarise(total_res_volume_zcta = sum(res_volume, na.rm = TRUE))
# tract-level residential area
Res_Bldg_Footprints2 <- Bldg_Footprints %>%
st_transform(crs = 2263) %>%
suppressWarnings(st_centroid(of_largest_polygon = TRUE)) %>%
mutate(base_bbl = as.character(base_bbl)) %>%
filter(base_bbl %in% ResBBLs &
feat_code == "2100") %>%
mutate(bldg_volume = shape_area * heightroof) %>%
left_join(., Pluto_ResOnly, by = "base_bbl") %>%
mutate(bldg_volume = if_else(is.na(bldg_volume), shape_area*numfloors*10, bldg_volume),
res_volume = (bldg_volume/unitstotal)*unitsres)
pm(get.tract.res <- function(res, tracts) st_intersection(res, tracts)) # takes a few minutes
res_bldg_tract <- get.tract.res(Res_Bldg_Footprints2, tractSF)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
res_bldg_tract_sum <- st_set_geometry(res_bldg_tract, NULL) %>%
group_by(GEOID) %>%
summarise(total_res_volume_tract = sum(res_volume, na.rm = TRUE))
#nrow(res_bldg_tract_sum) # 2132
#### COVID Tests ####
MODZCTA_NYC_shp1 <- MODZCTA_NYC_shp %>%
dplyr::select(modzcta, geometry) %>%
rename("zcta" = "modzcta")
May7_tests <- ZCTA_test_series %>%
filter(date=="2020-05-07") %>%
mutate(zcta = as.character(MODZCTA)) %>%
rename(total_tests = "Total") %>%
dplyr::select(zcta, date, Positive, total_tests)
ZCTA_by_boro1 <- ZCTA_by_boro %>%
mutate(boro = as.character(boro),
zcta = as.character(zip)) %>%
dplyr::select(-zip) %>%
bind_rows(.,
tibble(boro = as.character(c("Manhattan", "Manhattan" ,"Queens")), #correcting nas
zcta = as.character(c("10069", "10282", "11109"))))
# get water mask for maps
source(here("code/water_mask.R"))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# Figure 3B - Map of Tests by MODZCTA
theme_smallmaps <- theme(legend.title = element_text(face = "bold", size = 9),
plot.title = element_text(size = 9.5),
panel.background = element_rect(fill = "#cccccc"),
panel.grid = element_blank(),
legend.background = element_rect(fill = "transparent"),
legend.position = c(0.22, 0.80),
legend.text = element_text(size = 8.5),
plot.margin = unit(c(4,0,4,0), "pt"),
legend.key.size = unit(1.1, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
axis.title = element_blank())
fig3b <- MODZCTA_NYC_shp1 %>%
left_join(., May7_tests, by = "zcta") %>%
left_join(., ACS_Data2, by = "zcta") %>%
filter(zcta != "99999") %>%
mutate(pos_per_100000 = (Positive/total_pop1)*100000) %>%
ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(aes(fill = pos_per_100000), lwd = 0.2)+
scalebar(MODZCTA_NYC_shp1, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5,
anchor = c(x = -73.71, y = 40.51)) +
labs(fill = "Positives per 100,000") +
ggtitle("Cumulative positive COVID tests by zip code (May 7, 2020)") +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(MODZCTA_NYC_shp1),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
theme_smallmaps
#### Create data frames of all above information ####
ZCTA_ACS_COVID_shp <- MODZCTA_NYC_shp1 %>%
st_transform(., crs = 2263) %>%
dplyr::select(zcta, geometry) %>%
left_join(., ACS_Data2, by = "zcta") %>%
left_join(., May7_tests, by = "zcta") %>%
left_join(., Res_Bldg_Footprints, by = "zcta") %>%
left_join(., ACS_EssentialWrkr_Commute1, by = "zcta") %>%
left_join(., zcta_grocers, by = c("zcta" = "modzcta")) %>%
mutate(pop_density = as.numeric(total_pop1/st_area(geometry)),
avg_hhold_size = round((total_pop1/total_hholds1), 2),
pos_per_100000 = (Positive/total_pop1)*100000,
testing_ratio = (total_tests/total_pop1),
res_vol_zctadensity = as.numeric(total_res_volume_zcta/st_area(geometry)),
res_vol_popdensity = as.numeric(total_pop1/total_res_volume_zcta),
pubtrans_ferrysubway_commute = pubtrans_subway_commute + pubtrans_ferry_commute,
grocers = replace_na(grocers, 0),
grocers_per_1000 = (grocers/total_pop1)*1000,
pos_per_100000 = round(pos_per_100000, 0),
valid_var = "0",
one_over_grocers_per_1000 = if_else(is.infinite(1/grocers_per_1000), (1/.0293)+1, 1/grocers_per_1000), #max + 1 for zero values
didnot_workhome_commute = 100 - workhome_commute,
one_over_medincome = 1/medincome) %>%
dplyr::select(-pubtrans_subway_commute, -pubtrans_ferry_commute) %>%
left_join(., ZCTA_by_boro1, by = "zcta") %>%
mutate_at(vars(starts_with("essentialworker_")), ~round((./over16total_industry1)*100, 2)) %>%
filter(zcta != "99999") #remove na
ZCTA_ACS_COVID <- ZCTA_ACS_COVID_shp %>%
st_set_geometry(., NULL) #remove geometry
tract_vars <- tractSF %>% # uses local CRS
left_join(., st_set_geometry(acs_tracts2, NULL), by = "GEOID") %>%
left_join(., acs_tracts_commute1, by = "GEOID") %>%
left_join(., res_bldg_tract_sum, by = "GEOID") %>%
left_join(., tract_grocers, by = "GEOID") %>%
mutate(pop_density = as.numeric(total_pop1/st_area(geometry)),
avg_hhold_size = round((total_pop1/total_hholds1), 2),
res_vol_tractdensity = as.numeric(total_res_volume_tract/st_area(geometry)),
res_vol_popdensity = as.numeric(total_pop1/total_res_volume_tract),
pubtrans_ferrysubway_commute = pubtrans_subway_commute + pubtrans_ferry_commute,
grocers = replace_na(grocers, 0),
grocers_per_1000 = (grocers/total_pop1)*1000,
valid_var = "0",
didnot_workhome_commute = 100 - workhome_commute,
one_over_grocers_per_1000 = if_else(is.infinite(1/grocers_per_1000), (1/.0293)+1, 1/grocers_per_1000),
one_over_medincome = 1/medincome) %>%
dplyr::select(-pubtrans_subway_commute, -pubtrans_ferry_commute) %>%
mutate_at(vars(starts_with("essentialworker_")), ~round((./over16total_industry1)*100, 2))
#### Tract to ZCTA weighted assignment ####
# Shares of residential addresses in ZCTAs by Tract are later used to select
# representative tract-level SES values at a specified location on the ECDF
modzcta_to_zcta_chr <- data.frame(ZCTA = as.character(modzcta_to_zcta$ZCTA),
MODZCTA = as.character(modzcta_to_zcta$MODZCTA))
# Calculate the proportion of population each combined ZCTA contributes to MODZCTA
modzcta_to_zcta_pop <- modzcta_to_zcta_chr %>%
left_join(ACS_Data_scaled[, c("GEOID", "total_pop1")], by = c("ZCTA" = "GEOID")) %>%
group_by(MODZCTA) %>%
mutate(sum_pop = sum(total_pop1)) %>%
ungroup() %>%
mutate(pop_prop = total_pop1/sum_pop) %>%
arrange(MODZCTA)
# Sum ZCTA->Tract RES_RATIO weights when multiple ZCTA combine to a single MODZCTA
modzcta_to_tract <- modzcta_to_zcta_pop %>%
filter(MODZCTA != "99999") %>%
# Weights of each ZCTA are scaled by their proportion of MODZCTA population
left_join(zip_to_tract, by = c("ZCTA" = "ZIP")) %>%
mutate(scaled_res_ratio = RES_RATIO * pop_prop) %>%
# note: MODZCTA 10001 & 10007 contain a ZCTA with a small share of population
# but a zero share of RES_RATIO. This makes their sum of scaled_res_ratio by
# MODZCTA less than 1, but this will not effect the weighted median or 3Q values
# by tract.
group_by(MODZCTA, TRACT) %>%
dplyr::summarize(SUM_RES_RATIO = sum(scaled_res_ratio), .groups = "drop") %>%
# Weights are then multiplied by the population density of each tract
left_join(tract_vars, by = c("TRACT" = "GEOID")) %>%
dplyr::select(MODZCTA, TRACT, SUM_RES_RATIO, total_pop1, total_hholds1) %>%
filter(total_hholds1 > 0) %>%
mutate(tract_popdens = total_pop1/total_hholds1) %>%
mutate(W_RES_RATIO = SUM_RES_RATIO * tract_popdens) %>%
dplyr::select(MODZCTA, TRACT, W_RES_RATIO)
modzcta_to_tract
## # A tibble: 3,020 x 3
## MODZCTA TRACT W_RES_RATIO
## <chr> <chr> <dbl>
## 1 10001 36061005600 0.0307
## 2 10001 36061005800 0.0648
## 3 10001 36061007100 0
## 4 10001 36061007600 0.250
## 5 10001 36061008400 0.00422
## 6 10001 36061009100 0.154
## 7 10001 36061009300 0.145
## 8 10001 36061009500 0.231
## 9 10001 36061009700 0.250
## 10 10001 36061009900 0.253
## # … with 3,010 more rows
# length(unique(modzcta_to_tract$MODZCTA)) # 177
# length(unique(modzcta_to_tract$TRACT)) # 2111
# length(unique(tractSF$GEOID)) # 2167
# check res_ratio against tracts with no population
tract_modzcta_pop <- modzcta_to_tract %>%
left_join(acs_tracts2, by = c("TRACT" = "GEOID")) %>%
dplyr::select(MODZCTA, TRACT, total_pop1, W_RES_RATIO)
# checking for tracts with no population but res_ratio > 0
# (not possible now that the weighted ratio uses population)
#tract_modzcta_pop %>% filter(total_pop1 == 0 & W_RES_RATIO > 0) %>% nrow() # 0
# check to make sure no MODZCTA have all zeroes for all tract res_ratios
all_zero_ratio <- tract_modzcta_pop %>% group_by(MODZCTA) %>% filter(all(W_RES_RATIO == 0))
if(nrow(all_zero_ratio) > 0){
warning("The following MODZCTA have all zeroes for weighted tract residents:\n ",
paste(all_zero_ratio$MODZCTA, collapse = ","))
}
modzcta_to_tract2 <- dplyr::select(tract_modzcta_pop, -total_pop1)
### Preparation done --- Now for the Analysis ###
#### Part 1: Creation of BWQS Neighborhood Infection Risk Scores ####
# Step 1: Create univariate scatterplots with negative binomial linear smoothers
# to make sure direction of associations are consistent for all variables
# Visualize the non-linear relationship between infection rate and test_ratio (not adjusted for social inequity)
# infections versus testing ratio with smoothers; this covariate is not quantiled so the shape of association is important
ggplot(ZCTA_ACS_COVID, aes(x = testing_ratio, y = pos_per_100000)) + geom_point() +
ylab("infections per 100,000") +
geom_smooth(color = "red", formula = y ~ x, method = "glm.nb") +
stat_smooth(color = "green", method = "gam", formula = y ~ s(x), method.args = list(family = "nb"), se = F) +
geom_smooth(method = "glm.nb", formula = y ~ splines::ns(x, 3), se = FALSE) + theme_bw()
# 10 social variables
plotsocial <- ggplot(ZCTA_ACS_COVID, aes(y = pos_per_100000)) +
geom_point() + geom_smooth(color = "red", formula = y ~ x, method = "glm.nb") + ylab("infections per 100,000") + theme_bw()
plotsocial %+% aes(x = one_over_medincome)
plotsocial %+% aes(x = not_insured)
plotsocial %+% aes(x = one_over_grocers_per_1000)
plotsocial %+% aes(x = unemployed)
plotsocial %+% aes(x = res_vol_popdensity)
plotsocial %+% aes(x = didnot_workhome_commute)
plotsocial %+% aes(x = not_quarantined_jobs)
plotsocial %+% aes(x = avg_hhold_size)
plotsocial %+% aes(x = essentialworker_drove)
plotsocial %+% aes(x = essentialworker_pubtrans)
SES_vars <- names(ZCTA_ACS_COVID %>% dplyr::select(one_over_medincome, not_insured, one_over_grocers_per_1000, unemployed,
not_quarantined_jobs, essentialworker_pubtrans, essentialworker_drove,
didnot_workhome_commute, res_vol_popdensity, avg_hhold_size))
# Step 2a: Examine relationships between explanatory variables to make sure nothing >0.9 correlation, as this could bias BWQS
Cors_SESVars <- cor(x = ZCTA_ACS_COVID %>% dplyr::select(all_of(SES_vars)), method = "kendall")
Cors_SESVars1 <- as.data.frame(Cors_SESVars)
Cors_SESVars1$var1 <- row.names(Cors_SESVars1)
Cors_SESVars2 <- gather(data = Cors_SESVars1, key = "var2", value = "correlation", -var1) %>%
filter(var1 != var2)
Cors_SESVars2 %>% arrange(desc(correlation)) %>% distinct(correlation, .keep_all = T)
## var1 var2 correlation
## 1 essentialworker_drove not_quarantined_jobs 0.6121033
## 2 essentialworker_pubtrans one_over_medincome 0.5432472
## 3 unemployed one_over_medincome 0.5341512
## 4 not_insured one_over_medincome 0.5270258
## 5 essentialworker_pubtrans unemployed 0.5243542
## 6 didnot_workhome_commute essentialworker_drove 0.5086620
## 7 avg_hhold_size not_quarantined_jobs 0.4787113
## 8 didnot_workhome_commute not_quarantined_jobs 0.4763680
## 9 avg_hhold_size didnot_workhome_commute 0.4598744
## 10 res_vol_popdensity one_over_medincome 0.4549307
## 11 res_vol_popdensity essentialworker_pubtrans 0.4507468
## 12 res_vol_popdensity not_insured 0.4391560
## 13 avg_hhold_size essentialworker_drove 0.4198843
## [ reached 'max' / getOption("max.print") -- omitted 32 rows ]
if(export.figs) {
png(filename = file.path(fig.path, paste0("sfig1_", Sys.Date(), ".png")), width = 96*7, height = 96*7)
corrplot(Cors_SESVars, p.mat = Cors_SESVars, insig = "p-value", type = "lower",
sig.level = -1, tl.col = "black", tl.srt = 45)
dev.off()
}
## png
## 2
# Step 2b: Examine Univariable kendall associations for all selected variables with the outcome
bind_cols(Variables = SES_vars,
ZCTA_ACS_COVID %>%
dplyr::select(all_of(SES_vars), pos_per_100000) %>%
summarise_at(vars(all_of(SES_vars)), list(~cor(., pos_per_100000, method = "kendall"))) %>%
t() %>%
as_tibble(),
ZCTA_ACS_COVID %>%
dplyr::select(all_of(SES_vars), pos_per_100000) %>%
summarise_at(vars(all_of(SES_vars)),
list(~cor.test(., pos_per_100000, method = "kendall")$p.value)) %>%
t() %>%
as_tibble()) %>%
mutate(`Correlation (Tau)` = round(V1...2, 3),
`p value` = as.character(ifelse(V1...3 < 0.0001, "< 0.0001", round(V1...3, 3))),) %>%
dplyr::select(-V1...2, -V1...3)
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## New names:
## * V1 -> V1...2
## * V1 -> V1...3
## # A tibble: 10 x 3
## Variables `Correlation (Tau)` `p value`
## <chr> <dbl> <chr>
## 1 one_over_medincome 0.322 < 0.0001
## 2 not_insured 0.236 < 0.0001
## 3 one_over_grocers_per_1000 0.268 < 0.0001
## 4 unemployed 0.294 < 0.0001
## 5 not_quarantined_jobs 0.542 < 0.0001
## 6 essentialworker_pubtrans 0.175 0.001
## 7 essentialworker_drove 0.483 < 0.0001
## 8 didnot_workhome_commute 0.453 < 0.0001
## 9 res_vol_popdensity 0.106 0.035
## 10 avg_hhold_size 0.463 < 0.0001
# Step 3: Prepare data for BWQS and pass to stan for model fitting
pm(fit_BWQS_model <- function(data_list, stan_model_path){
fitted_model <- stan(
file = stan_model_path,
data = data_list,
chains = 1,
warmup = 2500,
iter = 20000,
thin = 10,
refresh = 0,
algorithm = "NUTS",
seed = 1234,
control = list(max_treedepth = 20,
adapt_delta = 0.999999999999999))
return(fitted_model)
})
Compute_Bayes_R2 <- function(fit) {
y_pred = exp(extract(fit,"eta")$eta)
var_fit = apply(y_pred, 1, var)
mean_fit = apply(y_pred, 1, mean)
phi = extract(fit,"phi")$phi
var_res = mean_fit + (mean_fit^2)/phi
r2 = var_fit / (var_fit + var_res)
return(list(meanr2 = mean(r2),
medianr2 = median(r2),
lowerr2 = quantile(r2, .025),
upperr2 = quantile(r2, .975)))
}
prep_BWQS_data <- function(df, ses_varnames){
y = as.numeric(df$pos_per_100000)
X <- df %>%
dplyr::select(all_of(ses_varnames))
K <- ns(df$testing_ratio, df = 3)
for (vname in ses_varnames)
X[[vname]] <- ecdf(X[[vname]])(X[[vname]]) * 10
data <-as.data.frame(cbind(y,X)) # Aggregate data in a data.frame
data_list = list(N = NROW(data),
N_new = NROW(data),
C = NCOL(X),
K = NCOL(K),
XC = cbind(as.matrix(X)),
XK = cbind(K),
XC_new = cbind(as.matrix(X)),
XK_new = cbind(K),
Dalp = rep(1,length(ses_varnames)),
y = as.vector(data$y))
return(list(data_list = data_list, X = X, K = K))
}
m1data <- prep_BWQS_data(ZCTA_ACS_COVID, SES_vars)
t_dataprep = Sys.time()
# fit our primary model -- negative binomial
m1 <- fit_BWQS_model(m1data$data_list, BWQS_stan_model)
t_m1 = Sys.time()
# print m1 for model diagnostics (n_eff as % of 1750, Rhat, trace plots, acf)
# m1
min(summary(m1)$summary[,"n_eff"]/1750) # effective sample size is at worst 78%
## [1] 0.6186883
traceplot(m1, pars = c("beta1", "W"))
stan_ac(m1, pars = c("beta1", "W"))
## Warning: Ignoring unknown parameters: fun.y
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
extract_waic(m1)
## $waic
## waic
## 2369.804
##
## $elpd_waic
## elpd_waic
## -1184.902
##
## $p_waic
## p_waic
## 14.69814
##
## $elpd_loo
## elpd_loo
## -1184.963
##
## $p_loo
## p_loo
## 14.75942
round(Compute_Bayes_R2(m1)$meanr2, 2)
## [1] 0.93
round(Compute_Bayes_R2(m1)$lowerr2, 2)
## 2.5%
## 0.92
round(Compute_Bayes_R2(m1)$upperr2, 2)
## 97.5%
## 0.95
# residual analysis with DHARMa
residuals_qq <- createDHARMa(simulatedResponse = t(extract(m1,"y_new")$y_new), observedResponse = m1data$data_list$y)
## No fitted predicted response provided, using the mean of the simulations
#plotQQunif(residuals_qq, testOutliers = F, testDispersion = F) #base graphics
ks_unif <- testUniformity(residuals_qq)
residuals_qq_unif <- gap::qqunif(residuals_qq$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1)
sfig3_qq <- ggplot(data.frame(x = residuals_qq_unif$x, y = residuals_qq_unif$y)) +
geom_abline(linetype = "dashed", color = "blue") +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
geom_point(aes(x, y), size = 0.8) +
annotate("text", x = 0.02, y = 0.9, hjust = 0, size = 3.2,
label = paste0("Kolmogorov-Smirnov test for\ncomparison of distributions: p=",
round(ks_unif$p.value, 2))) +
coord_equal(ratio = 1) +
labs(x = "Expected", y = "Observed") +
theme_bw() + theme(plot.margin = unit(c(5.5, 11, 5.5, 5.5), "points"))
if(export.figs) ggsave(filename = file.path(fig.path, paste0("sfig3_", Sys.Date(), ".png")), width = 3.5, height = 3.4)
# examine parameter estimates
exp(mean(extract(m1, "beta1")$beta1))
## [1] 1.076975
vars = c("phi", "beta0", "beta1", paste0("delta", 1:3), SES_vars)
parameters_to_drop <- c("phi", paste0("delta", 1:3), "beta0", "beta1")
number_of_coefficients <- length(vars)
BWQS_params <- bind_cols(as_tibble(summary(m1)$summary[1:number_of_coefficients,c(1,4,6,8:10)]), label = vars)
BWQS_params %>% filter(label == "beta1") %>% mutate_at(vars(1:3), ~exp(.))
## # A tibble: 1 x 7
## mean `2.5%` `50%` `97.5%` n_eff Rhat label
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1.08 1.06 1.08 0.0855 1631. 1.00 beta1
BWQS_fits <- BWQS_params %>%
rename(lower = "2.5%",
upper = "97.5%") %>%
filter(!label %in% parameters_to_drop) %>%
arrange(desc(mean)) %>%
mutate(group = factor(if_else(label == "one_over_medincome"|label =="not_insured"|label == "unemployed", "Finances &\nAccess to care",
if_else(label == "one_over_grocers_per_1000", "Food\nAccess",
if_else(str_detect(label, "essential")|label == "not_quarantined_jobs"|label=="didnot_workhome_commute", "Commuting and\nEssential Work",
if_else(label == "avg_hhold_size"|label == "res_vol_popdensity", "Population\nDensity", "Unmatched")))),
levels = c("Commuting and\nEssential Work", "Finances &\nAccess to care", "Population\nDensity", "Food\nAccess")))
labels1 <- c("one_over_medincome" = "1/\nMedian income",
"not_insured" = "Uninsured",
"unemployed" = "Unemployed",
"one_over_grocers_per_1000" = "1/\nGrocers per 1000",
"not_quarantined_jobs" = "Essential Workers",
"essentialworker_pubtrans" = "Essential Worker:\nPublic Transit",
"essentialworker_drove" = "Essential Worker:\nDriving Commute",
"didnot_workhome_commute" = "1/\nWork from home",
"res_vol_popdensity" = "Population Density\nby Residential Volume",
"avg_hhold_size" = "Average people\nper household")
labels2 <- c("phi" = "Overdispersion",
"beta0" = "Intercept",
"beta1" = "BWQS term",
"delta1" = "Testing ratio: spline term 1",
"delta2" = "Testing ratio: spline term 2",
"delta3" = "Testing ratio: spline term 3",
labels1)
# Supplemental Table 2: Parameter estimates, credible intervals, and diagnostics from main BWQS infections model
BWQS_params %>% bind_cols(., "terms" = labels2) %>%
dplyr::select(-label) %>%
mutate_at(vars(1:6), ~round(., 3)) %>%
mutate_at(vars(5), ~round(., 0)) %>%
mutate(`95% CrI`=paste0("(",format(`2.5%`, nsmall=3),", ",format(`97.5%`, nsmall=3),")")) %>%
mutate(terms =
case_when(terms=="BWQS term" ~ "COVID-19 inequity index",
TRUE ~ terms)) %>%
dplyr::select(-`2.5%`, -`97.5%`) %>%
rename(median=`50%`) %>%
dplyr::select(terms, mean, `95% CrI`, median, Rhat, n_eff) %>%
kbl(align=rep('r', 6), font_size = 9.5) %>%
kable_classic(full_width = F, html_font = "Arial")
| terms | mean | 95% CrI | median | Rhat | n_eff |
|---|---|---|---|---|---|
| Overdispersion | 104.271 | (81.793, 129.364) | 103.650 | 1.000 | 1730 |
| Intercept | 6.261 | ( 6.177, 6.344) | 6.261 | 0.999 | 1663 |
| COVID-19 inequity index | 0.074 | ( 0.063, 0.086) | 0.074 | 1.001 | 1631 |
| Testing ratio: spline term 1 | 0.983 | ( 0.893, 1.072) | 0.985 | 1.000 | 1781 |
| Testing ratio: spline term 2 | 2.029 | ( 1.821, 2.244) | 2.025 | 1.000 | 1539 |
| Testing ratio: spline term 3 | 1.121 | ( 1.002, 1.233) | 1.121 | 0.999 | 1546 |
| 1/ Median income | 0.105 | ( 0.009, 0.231) | 0.100 | 1.000 | 1638 |
| Uninsured | 0.177 | ( 0.056, 0.302) | 0.176 | 1.000 | 1505 |
| Unemployed | 0.027 | ( 0.001, 0.076) | 0.022 | 1.000 | 1672 |
| 1/ Grocers per 1000 | 0.055 | ( 0.003, 0.143) | 0.049 | 1.000 | 1660 |
| Essential Workers | 0.062 | ( 0.002, 0.173) | 0.052 | 0.999 | 1528 |
| Essential Worker: Public Transit | 0.050 | ( 0.002, 0.138) | 0.042 | 0.999 | 1771 |
| Essential Worker: Driving Commute | 0.166 | ( 0.035, 0.289) | 0.167 | 1.000 | 1790 |
| 1/ Work from home | 0.089 | ( 0.009, 0.193) | 0.086 | 1.000 | 1706 |
| Population Density by Residential Volume | 0.108 | ( 0.013, 0.210) | 0.107 | 0.999 | 1798 |
| Average people per household | 0.161 | ( 0.032, 0.297) | 0.158 | 0.999 | 1715 |
# create a figure with parameter estimates for the weights
fig2 <- ggplot(data=BWQS_fits, aes(x= reorder(label, mean), y=mean, ymin=lower, ymax=upper)) +
geom_pointrange() +
coord_flip() +
xlab("") +
ylab("Mean (95% credible intervals)") +
scale_x_discrete(labels = labels1) +
theme_set(theme_bw(base_size = 18)) +
facet_grid(group~., scales = "free", space = "free") +
theme(strip.text.x = element_text(size = 14))
if(export.figs) ggsave(fig2, filename = file.path(fig.path, paste0("fig2", "_", Sys.Date(),".png")),
dpi = 300, width = 8, height = 8)
# Step 4: Construct the summative COVID-19 inequity index value for each ZCTA
# Use the variable-specific weight on the quantiled splits to create a 10 point ZCTA-level infection risk score
BWQS_weights <- as.numeric(summary(m1)$summary[(length(vars)-length(SES_vars) + 1):number_of_coefficients,c(1)])
ZCTA_ACS_COVID2 <- m1data$X * BWQS_weights[col(ZCTA_ACS_COVID)]
BWQS_index <- ZCTA_ACS_COVID2 %>%
dplyr::mutate(BWQS_index = rowSums(.)) %>%
dplyr::select(BWQS_index)
# Visualize the relationship between BWQS and test_ratio
ggplot(data.frame(BWQS_index = BWQS_index$BWQS_index, testing_ratio = ZCTA_ACS_COVID$testing_ratio), aes(testing_ratio, BWQS_index)) + geom_point() +
geom_smooth() + geom_smooth(color = "red", method = "lm")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# BWQS is correlated with testing_ratio
cor.test(BWQS_index$BWQS_index, ZCTA_ACS_COVID$testing_ratio, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: BWQS_index$BWQS_index and ZCTA_ACS_COVID$testing_ratio
## S = 472628, p-value = 1.969e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4885952
# Partial regression plot for BWQS index
# shows a nice linear relationship of BWQS index with infections after adjustment for testing
nb_testing_ns3df <- glm.nb(m1data$data_list$y ~ m1data$K)
yresid <- resid(nb_testing_ns3df)
bwqs_testing_ns3df <- lm(BWQS_index$BWQS_index ~ m1data$K)
bwqsresid <- resid(bwqs_testing_ns3df)
ggplot(data.frame(yresid, bwqsresid), aes(bwqsresid, yresid)) + geom_point() +
geom_smooth(formula = y ~ x, method = "lm", se = F) +
ylab("residual log infection rate\n(adjusted for testing)") + xlab("residual BWQS infection risk index\n(adjusted for testing)")
summary(lm(yresid ~ bwqsresid))
##
## Call:
## lm(formula = yresid ~ bwqsresid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.61379 -0.36053 0.01232 0.32793 1.95081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05117 0.04794 -1.067 0.287
## bwqsresid 0.48714 0.03001 16.230 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6378 on 175 degrees of freedom
## Multiple R-squared: 0.6008, Adjusted R-squared: 0.5986
## F-statistic: 263.4 on 1 and 175 DF, p-value: < 2.2e-16
rm(nb_testing_ns3df, yresid, bwqs_testing_ns3df, bwqsresid)
# Visualize the full model observed vs predicted -- shows a close relationship (which is why R2 is so high)
preddf <- data.frame(data.frame(pred = colMeans(extract(m1,"y_new")$y_new), y = m1data$data_list$y))
ggplot(preddf, aes(pred, y)) +
geom_point() +
geom_abline(linetype = "dashed", color = "grey10") +
coord_fixed(ratio = 1) +
scale_x_continuous("Predicted Infections per 100,000", label=comma, limits = range(preddf)) +
scale_y_continuous("Infections per 100,000", label=comma) +
theme_bw()
rm(preddf)
# calculate credible interval over the mean predicted infections
# at the median testing_ratio
sim_out <- data.frame(extract(m1, pars = c("beta0", "beta1", "delta")))
median_testing <- predict(m1data$K, median(ZCTA_ACS_COVID$testing_ratio))
# calculate term for median testing_rate
sim_out$deltamedian <- with(sim_out, median_testing[1] * delta.1 +
median_testing[2] * delta.2 +
median_testing[3] * delta.3)
# make a sequence of BWQS values
xseqlength <- 300
bwqs_seq <- seq(from = min(BWQS_index$BWQS_index),
to = max(BWQS_index$BWQS_index),
length.out = xseqlength)
sim_matrix <- sim_out$beta0 %o% rep(1, xseqlength) +
sim_out$beta1 %o% bwqs_seq +
sim_out$deltamedian%o% rep(1, xseqlength)
sim_bwqs_df <- data.frame(bwqs_seq,
lower = exp(colQuantiles(sim_matrix, probs = 0.025)),
upper = exp(colQuantiles(sim_matrix, probs = 0.975)),
mean = exp(colMeans(sim_matrix)))
rm(median_testing, xseqlength, bwqs_seq, sim_matrix)
# calculate credible interval over the mean predicted infections
# at the median COVID-19 inequity index
sim_out$beta1median <- sim_out$beta1 * median(BWQS_index$BWQS_index)
# make a sequence of testing_ratio values
xseqlength <- 300
inequity_seq <- data.frame(x = seq(from = min(ZCTA_ACS_COVID$testing_ratio),
to = max(ZCTA_ACS_COVID$testing_ratio),
length.out = xseqlength))
inequity_seq <- data.frame(x = inequity_seq$x, t(sapply(inequity_seq$x, FUN = function(x) predict(m1data$K, x))))
sim_matrix <-
sim_out$beta0 %o% rep(1, xseqlength) +
sim_out$beta1median %o% rep(1, xseqlength) +
sim_out$delta.1 %o% inequity_seq$X1 +
sim_out$delta.2 %o% inequity_seq$X2 +
sim_out$delta.3 %o% inequity_seq$X3
sim_testing_df <- data.frame(x = inequity_seq$x,
lower = exp(colQuantiles(sim_matrix, probs = 0.025)),
upper = exp(colQuantiles(sim_matrix, probs = 0.975)),
mean = exp(colMeans(sim_matrix)))
rm(sim_out, xseqlength, inequity_seq, sim_matrix)
# Visualize the relationship between BWQS index and infection rate at the median testing_ratio
BWQS_scatter <- ggplot(sim_bwqs_df, aes(x = bwqs_seq)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'grey75') +
geom_line(aes(y = mean)) +
geom_point(data = data.frame(BWQS_index, y = m1data$data_list$y), aes(BWQS_index, y), alpha = 0.5) +
scale_x_continuous("COVID-19 inequity index") +
scale_y_continuous("Infections per 100,000", label=comma)
BWQS_scatter <- ggExtra::ggMarginal(BWQS_scatter, type = "histogram", fill = "grey40", margins = "both",
xparams = list(binwidth = 0.5), yparams = list(binwidth = 200))
if(export.figs) {
png(filename = file.path(fig.path, paste0("fig1_", Sys.Date(), ".png")), width = 96*5, height = 96*5)
print(BWQS_scatter)
dev.off()
}
## png
## 2
# Visualize the relationship between testing_ratio and infection rate at the median BWQS
testing_scatter <- ggplot(sim_testing_df, aes(x = x)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'grey75') +
geom_line(aes(y = mean)) +
geom_point(data = data.frame(testing_ratio = ZCTA_ACS_COVID$testing_ratio, y = m1data$data_list$y),
aes(testing_ratio, y), alpha = 0.5) +
scale_x_continuous("Testing ratio") +
scale_y_continuous("Infections per 100,000", label=comma)
testing_scatter <- ggExtra::ggMarginal(testing_scatter, type = "histogram", fill = "grey40", margins = "both",
xparams = list(binwidth = 0.005), yparams = list(binwidth = 200))
if(export.figs) {
png(filename = file.path(fig.path, paste0("sfig2_", Sys.Date(), ".png")), width = 96*5, height = 96*5)
print(testing_scatter)
dev.off()
}
## png
## 2
# Step 5: Visualize the spatial distribution of ZCTA-level infection risk scores
ZCTA_BWQS_COVID_shp <- ZCTA_ACS_COVID_shp %>%
bind_cols(., BWQS_index)
# reproject to WGS84 to be compatible with scalebar
ZCTA_BWQS_COVID_shp <- st_transform(ZCTA_BWQS_COVID_shp, 4326)
# Figure 3A - BWQS Index by ZCTA
fig3a <- ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(data = ZCTA_BWQS_COVID_shp, aes(fill = BWQS_index), lwd = 0.2) +
scalebar(ZCTA_BWQS_COVID_shp, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5, st.dist = 0.011,
anchor = c(x = -73.71, y = 40.49)) +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(ZCTA_BWQS_COVID_shp),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
labs(fill = "COVID-19 Inequity Index") +
theme(legend.title = element_text(face = "bold", size = 9),
panel.background = element_rect(fill = "#cccccc"),
legend.background = element_rect(fill = "transparent"),
panel.grid = element_blank(),
legend.position = c(0.125, 0.90),
legend.text = element_text(size = 8.5),
legend.key.size = unit(1.1, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank())
# Step 6: Compare quantile distribution of ZCTA-level BWQS scores by the race/ethnic composition of residents
Demographics <- ACS_Data_scaled %>%
dplyr::select(GEOID, ends_with("_raceethnic"), total_pop1) %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(ends_with("_raceethnic"), total_pop1), ~sum(.)) %>%
mutate(zcta = as.character(MODZCTA),
other_raceethnic = total_pop1 - (hisplat_raceethnic + nonhispLat_white_raceethnic + nonhispLat_black_raceethnic +
nonhispLat_amerindian_raceethnic + nonhispLat_asian_raceethnic))
ZCTA_BQWS <- ZCTA_BWQS_COVID_shp %>%
st_set_geometry(., NULL) %>%
dplyr::select(zcta, BWQS_index)
Demographics_for_ridges <- Demographics %>%
left_join(., ZCTA_BQWS, by = "zcta") %>%
dplyr::select(-total_pop1) %>%
gather(key = "Race/Ethnicity", value = "Population", hisplat_raceethnic:other_raceethnic) %>%
mutate(`Race/Ethnicity` = if_else(`Race/Ethnicity`=="hisplat_raceethnic","Hispanic/Latinx",
if_else(`Race/Ethnicity`=="nonhispLat_black_raceethnic", "Black",
if_else(`Race/Ethnicity`=="nonhispLat_white_raceethnic", "White",
if_else(`Race/Ethnicity`== "nonhispLat_asian_raceethnic", "Asian", "Other")))),
`Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c( "White", "Asian", "Other","Hispanic/Latinx","Black")),
Population = as.numeric(Population))
(Demographics_for_ridges %>%
group_by(`Race/Ethnicity`) %>%
summarise(weighted.mean(BWQS_index, Population),
weightedMedian(BWQS_index, Population)))
## # A tibble: 5 x 3
## `Race/Ethnicity` `weighted.mean(BWQS_index, Po… `weightedMedian(BWQS_index, P…
## <fct> <dbl> <dbl>
## 1 White 4.34 4.71
## 2 Asian 5.48 6.20
## 3 Other 5.14 5.67
## 4 Hispanic/Latinx 6.10 6.60
## 5 Black 6.26 6.51
fig4 <- ggplot(Demographics_for_ridges,
aes(x = BWQS_index, y = `Race/Ethnicity`)) +
xlab("COVID-19 inequity index")+
geom_density_ridges(
aes(height = ..density..,
weight = Population / sum(Population)),
scale = 0.95,
stat ="density") +
scale_y_discrete(expand = c(0, 0)) +
expand_limits(y = 6) +
theme(legend.position = "none")
if(export.figs) ggsave(plot = fig4, filename = file.path(fig.path, paste0("fig4","_",Sys.Date(),".png")),
dpi = 300, device = "png", width = 8, height = 5)
Below_25th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index<quantile(BWQS_index, .25))
Race_Ethncity_below25th <- Demographics %>%
filter(zcta %in% Below_25th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2))) %>%
mutate(Group = "Below 25th percentile BWQS")
Between_25thand75th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index>quantile(BWQS_index, .25) & BWQS_index<quantile(BWQS_index, .75))
Race_Ethncity_btw_25th75th <- Demographics %>%
filter(zcta %in% Between_25thand75th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
mutate(Group = "Between 25-75th percentile BWQS")
Above_75th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index>quantile(BWQS_index, .75))
Race_Ethncity_above_75th <- Demographics %>%
filter(zcta %in% Above_75th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2))) %>%
mutate(Group = "Above 75th percentile BWQS")
Race_Ethncity_all <- Demographics %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
mutate(Group = "NYC Population")
Demographics_by_BWQS <- bind_rows(Race_Ethncity_all, Race_Ethncity_below25th, Race_Ethncity_btw_25th75th, Race_Ethncity_above_75th) %>%
mutate(Other = other_raceethnic + nonhispLat_amerindian_raceethnic) %>%
dplyr::select(-other_raceethnic, -nonhispLat_amerindian_raceethnic, - total_pop1) %>%
rename("Hispanic/Latinx" = "hisplat_raceethnic",
"Black" = "nonhispLat_black_raceethnic",
"White" = "nonhispLat_white_raceethnic",
"Asian" = "nonhispLat_asian_raceethnic") %>%
gather(., "Race/Ethnicity", "Proportion", 1:4,6) %>%
mutate(`Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c("Other", "Asian", "White", "Hispanic/Latinx", "Black")),
Group = factor(Group, levels = c("NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS")))
labels_demographics <- c("NYC Population" = "NYC Population", "Below 25th percentile BWQS" = "Below 25th\npercentile BWQS",
"Between 25-75th percentile BWQS" = "Between 25-75th\npercentile BWQS", "Above 75th percentile BWQS" = "Above 75th\npercentile BWQS")
labels_demographics <- c("NYC Population" = "NYC Population", "Below 25th percentile BWQS" = "Below 25th\n%ile index",
"Between 25-75th percentile BWQS" = "Between 25-75th\n%ile index", "Above 75th percentile BWQS" = "Above 75th\n%ile index")
sfig4 <- ggplot(Demographics_by_BWQS, aes(fill=`Race/Ethnicity`, y=Proportion, x=Group)) +
geom_rect(data = subset(Demographics_by_BWQS, Group=="NYC Population"),
aes(xmin=as.numeric(Group)-.35,xmax=as.numeric(Group)+.35, ymin=0, ymax=100, fill="gray85"), color = "gray", alpha = .1) +
geom_bar(data = subset(Demographics_by_BWQS, Group!="NYC Population"), position="stack", stat="identity", width = .75) +
geom_bar(data = subset(Demographics_by_BWQS, Group=="NYC Population"), position="stack", stat="identity", width = .45) +
scale_fill_manual(breaks = c("Other", "Asian", "White", "Hispanic/Latinx", "Black"),
values = c("#984ea3","#ff7f00","gray85", "#4daf4a", "#e41a1c", "#377eb8")) +
geom_text(aes(label=ifelse(Proportion >= 5, paste0(sprintf("%.0f", Proportion),"%"),"")),
position=position_stack(vjust=0.5), colour="black", size = 8) +
scale_y_continuous("Percentage", expand = c(0,0), labels = function(x) paste0(x, "%")) +
scale_x_discrete(limits = c( "NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS"),
labels = labels_demographics) +
theme_bw(base_size = 16) +
theme(legend.text = element_text(size = 14),
axis.text.y = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black"),
axis.title.x = element_blank())
if(export.figs) ggsave(sfig4, filename = file.path(fig.path, paste0("sfig4","_",Sys.Date(),".png")), device = "png",
dpi = 300, width = 12, height = 6)
t_postm1 = Sys.time()
#### BWQS by Tract ####
get_tract_vars_by_zcta <- function(tract_vars, modzcta_tract_crosswalk, whichq){
whichq = str_to_lower(whichq)
if(whichq %in% c("median", "q2", "2q", "2")){
qname = "median"
qnum = 2
} else if(whichq %in% c("q3", "3q", "3")){
qname = "3Q"
qnum = 3
} else { stop("unhandled quartile selected: ", whichq)}
ses_zcta <- tract_vars %>%
st_set_geometry(., NULL) %>%
left_join(., modzcta_tract_crosswalk, by = c("GEOID" = "TRACT")) %>%
filter(!is.na(MODZCTA)) %>%
group_by(MODZCTA) %>%
summarise(essentialworker_drove_rename = Hmisc::wtd.quantile(essentialworker_drove, W_RES_RATIO)[[qnum]],
essentialworker_pubtrans_rename = Hmisc::wtd.quantile(essentialworker_pubtrans, W_RES_RATIO)[[qnum]],
not_quarantined_jobs_rename = Hmisc::wtd.quantile(not_quarantined_jobs, W_RES_RATIO)[[qnum]],
didnot_workhome_commute_rename = Hmisc::wtd.quantile(didnot_workhome_commute, W_RES_RATIO)[[qnum]],
not_insured_rename = Hmisc::wtd.quantile(not_insured, W_RES_RATIO)[[qnum]],
one_over_medincome_rename = Hmisc::wtd.quantile(one_over_medincome, W_RES_RATIO)[[qnum]],
unemployed_rename = Hmisc::wtd.quantile(unemployed, W_RES_RATIO)[[qnum]],
avg_hhold_size_rename = Hmisc::wtd.quantile(avg_hhold_size, W_RES_RATIO)[[qnum]],
res_vol_popdensity_rename = Hmisc::wtd.quantile(res_vol_popdensity, W_RES_RATIO)[[qnum]],
one_over_grocers_per_1000_rename = Hmisc::wtd.quantile(one_over_grocers_per_1000, W_RES_RATIO)[[qnum]])
ses_zcta %>% rename_with(~ gsub("rename", qname, .x))
}
# Select median tract values by MODZCTA, weighted by residential address share and population density by tract
SES_zcta_median <- get_tract_vars_by_zcta(tract_vars, modzcta_to_tract2, "median")
SES_vars_median = names(SES_zcta_median)[names(SES_zcta_median) != "MODZCTA"]
# join SES to testing data: positive/100k and testing_ratio
SES_zcta_median_testing <- ZCTA_ACS_COVID %>%
dplyr::select(zcta, pos_per_100000, testing_ratio) %>%
left_join(SES_zcta_median, by = c("zcta" = "MODZCTA"))
# BWQS using median at the tract level
m2data_median <- prep_BWQS_data(SES_zcta_median_testing, SES_vars_median)
m2_median <- fit_BWQS_model(data_list = m2data_median$data_list, stan_model_path = BWQS_stan_model)
## Warning in validityMethod(object): The following variables have undefined
## values: log_lik[1],The following variables have undefined values: log_lik[2],The
## following variables have undefined values: log_lik[3],The following variables
## have undefined values: log_lik[4],The following variables have undefined
## values: log_lik[5],The following variables have undefined values: log_lik[6],The
## following variables have undefined values: log_lik[7],The following variables
## have undefined values: log_lik[8],The following variables have undefined values:
## log_lik[9],The following variables have undefined values: log_lik[10],The
## following variables have undefined values: log_lik[11],The following
## variables have undefined values: log_lik[12],The following variables have
## undefined values: log_lik[13],The following variables have undefined values:
## log_lik[14],The following variables have undefined values: log_lik[15],The
## following variables have undefined values: log_lik[16],The following
## variables have undefined values: log_lik[17],The following variables have
## undefined values: log_lik[18],The following variables have undefined values:
## log_lik[19],The following variables have undefined values: log_lik[20],The
## following variables have undefined values: log_lik[21],The following
## variables have undefined values: log_lik[22],The following variables have
## undefined values: log_lik[23],The following variables have undefined values:
## log_lik[24],The following variables have undefined values: log_lik[25],The
## following variables have undefined values: log_lik[26],The following
## variables have undefined values: log_lik[27],The following variables have
## undefined values: log_lik[28],The following variables have undefined values:
## log_lik[29],The following variables have undefined values: log_lik[30],The
## following variables have undefined values: log_lik[31],The following
## variables have undefined values: log_lik[32],The following variables have
## undefined values: log_lik[33],The following variables have undefined values:
## log_lik[34],The following variables have undefined values: log_lik[35],The
## following variables have undefined values: log_lik[36],The following
## variables have undefined values: log_lik[37],The following variables have
## undefined values: log_lik[38],The following variables have undefined values:
## log_lik[39],The following variables have undefined values: log_lik[40],The
## following variables have undefined values: log_lik[41],The following
## variables have undefined values: log_lik[42],The following variables have
## undefined values: log_lik[43],The following variables have undefined values:
## log_lik[44],The following variables have undefined values: log_lik[45],The
## following variables have undefined values: log_lik[46],The following
## variables have undefined values: log_lik[47],The following variables have
## undefined values: log_lik[48],The following variables have undefined values:
## log_lik[49],The following variables have undefined values: log_lik[50],The
## following variables have undefined values: log_lik[51],The following
## variables have undefined values: log_lik[52],The following variables have
## undefined values: log_lik[53],The following variables have undefined values:
## log_lik[54],The following variables have undefined values: log_lik[55],The
## following variables have undefined values: log_lik[56],The following
## variables have undefined values: log_lik[57],The following variables have
## undefined values: log_lik[58],The following variables have undefined values:
## log_lik[59],The following variables have undefined values: log_lik[60],The
## following variables have undefined values: log_lik[61],The following
## variables have undefined values: log_lik[62],The following variables have
## undefined values: log_lik[63],The following variables have undefined values:
## log_lik[64],The following variables have undefined values: log_lik[65],The
## following variables have undefined values: log_lik[66],The following
## variables have undefined values: log_lik[67],The following variables have
## undefined values: log_lik[68],The following variables have undefined values:
## log_lik[69],The following variables have undefined values: log_lik[70],The
## following variables have undefined values: log_lik[71],The following
## variables have undefined values: log_lik[72],The following variables have
## undefined values: log_lik[73],The following variables have undefined values:
## log_lik[74],The following variables have undefined values: log_lik[75],The
## following variables have undefined values: log_lik[76],The following
## variables have undefined values: log_lik[77],The following variables have
## undefined values: log_lik[78],The following variables have undefined values:
## log_lik[79],The following variables have undefined values: log_lik[80],The
## following variables have undefined values: log_lik[81],The following
## variables have undefined values: log_lik[82],The following variables have
## undefined values: log_lik[83],The following variables have undefined values:
## log_lik[84],The following variables have undefined values: log_lik[85],The
## following variables have undefined values: log_lik[86],The following
## variables have undefined values: log_lik[87],The following variables have
## undefined values: log_lik[88],The following variables have undefined values:
## log_lik[89],The following variables have undefined values: log_lik[90],The
## following variables have undefined values: log_lik[91],The following
## variables have undefined values: log_lik[92],The following variables have
## undefined values: log_lik[93],The following variables have undefined values:
## log_lik[94],The following variables have undefined values: log_lik[95],The
## following variables have undefined values: log_lik[96],The following
## variables have undefined values: log_lik[97],The following variables have
## undefined values: log_lik[98],The following variables have undefined values:
## log_lik[99],The following variables have undefined values: log_lik[100],The
## following variables have undefined values: log_lik[101],The following
## variables have undefined values: log_lik[102],The following variables have
## undefined values: log_lik[103],The following variables have undefined values:
## log_lik[104],The following variables have undefined values: log_lik[105],The
## following variables have undefined values: log_lik[106],The following
## variables have undefined values: log_lik[107],The following variables have
## undefined values: log_lik[108],The following variables have undefined values:
## log_lik[109],The following variables have undefined values: log_lik[110],The
## following variables have undefined values: log_lik[111],The following
## variables have undefined values: log_lik[112],The following variables have
## undefined values: log_lik[113],The following variables have undefined values:
## log_lik[114],The following variables have undefined values: log_lik[115],The
## following variables have undefined values: log_lik[116],The following
## variables have undefined values: log_lik[117],The following variables have
## undefined values: log_lik[118],The following variables have undefined values:
## log_lik[119],The following variables have undefined values: log_lik[120],The
## following variables have undefined values: log_lik[121],The following
## variables have undefined values: log_lik[122],The following variables have
## undefined values: log_lik[123],The following variables have undefined values:
## log_lik[124],The following variables have undefined values: log_lik[125],The
## following variables have undefined values: log_lik[126],The following
## variables have undefined values: log_lik[127],The following variables have
## undefined values: log_lik[128],The following variables have undefined values:
## log_lik[129],The following variables have undefined values: log_lik[130],The
## following variables have undefined values: log_lik[131],The following
## variables have undefined values: log_lik[132],The following variables have
## undefined values: log_lik[133],The following variables have undefined values:
## log_lik[134],The following variables have undefined values: log_lik[135],The
## following variables have undefined values: log_lik[136],Th
t_m2 = Sys.time()
# Select third quartile tract values by MODZCTA, weighted by residential address share and population density by tract
SES_zcta_3q <- get_tract_vars_by_zcta(tract_vars, modzcta_to_tract2, "3q")
SES_vars_3q = names(SES_zcta_3q)[names(SES_zcta_3q) != "MODZCTA"]
SES_zcta_3q_testing <- ZCTA_ACS_COVID %>%
dplyr::select(zcta, pos_per_100000, testing_ratio) %>%
left_join(SES_zcta_3q, by = c("zcta" = "MODZCTA"))
#BWQS using the 3rd quartile at the tract level
m2data_3q <- prep_BWQS_data(SES_zcta_3q_testing, SES_vars_3q)
m2_3q <- fit_BWQS_model(data_list = m2data_3q$data_list, stan_model_path = BWQS_stan_model)
## Warning in validityMethod(object): The following variables have undefined
## values: log_lik[1],The following variables have undefined values: log_lik[2],The
## following variables have undefined values: log_lik[3],The following variables
## have undefined values: log_lik[4],The following variables have undefined
## values: log_lik[5],The following variables have undefined values: log_lik[6],The
## following variables have undefined values: log_lik[7],The following variables
## have undefined values: log_lik[8],The following variables have undefined values:
## log_lik[9],The following variables have undefined values: log_lik[10],The
## following variables have undefined values: log_lik[11],The following
## variables have undefined values: log_lik[12],The following variables have
## undefined values: log_lik[13],The following variables have undefined values:
## log_lik[14],The following variables have undefined values: log_lik[15],The
## following variables have undefined values: log_lik[16],The following
## variables have undefined values: log_lik[17],The following variables have
## undefined values: log_lik[18],The following variables have undefined values:
## log_lik[19],The following variables have undefined values: log_lik[20],The
## following variables have undefined values: log_lik[21],The following
## variables have undefined values: log_lik[22],The following variables have
## undefined values: log_lik[23],The following variables have undefined values:
## log_lik[24],The following variables have undefined values: log_lik[25],The
## following variables have undefined values: log_lik[26],The following
## variables have undefined values: log_lik[27],The following variables have
## undefined values: log_lik[28],The following variables have undefined values:
## log_lik[29],The following variables have undefined values: log_lik[30],The
## following variables have undefined values: log_lik[31],The following
## variables have undefined values: log_lik[32],The following variables have
## undefined values: log_lik[33],The following variables have undefined values:
## log_lik[34],The following variables have undefined values: log_lik[35],The
## following variables have undefined values: log_lik[36],The following
## variables have undefined values: log_lik[37],The following variables have
## undefined values: log_lik[38],The following variables have undefined values:
## log_lik[39],The following variables have undefined values: log_lik[40],The
## following variables have undefined values: log_lik[41],The following
## variables have undefined values: log_lik[42],The following variables have
## undefined values: log_lik[43],The following variables have undefined values:
## log_lik[44],The following variables have undefined values: log_lik[45],The
## following variables have undefined values: log_lik[46],The following
## variables have undefined values: log_lik[47],The following variables have
## undefined values: log_lik[48],The following variables have undefined values:
## log_lik[49],The following variables have undefined values: log_lik[50],The
## following variables have undefined values: log_lik[51],The following
## variables have undefined values: log_lik[52],The following variables have
## undefined values: log_lik[53],The following variables have undefined values:
## log_lik[54],The following variables have undefined values: log_lik[55],The
## following variables have undefined values: log_lik[56],The following
## variables have undefined values: log_lik[57],The following variables have
## undefined values: log_lik[58],The following variables have undefined values:
## log_lik[59],The following variables have undefined values: log_lik[60],The
## following variables have undefined values: log_lik[61],The following
## variables have undefined values: log_lik[62],The following variables have
## undefined values: log_lik[63],The following variables have undefined values:
## log_lik[64],The following variables have undefined values: log_lik[65],The
## following variables have undefined values: log_lik[66],The following
## variables have undefined values: log_lik[67],The following variables have
## undefined values: log_lik[68],The following variables have undefined values:
## log_lik[69],The following variables have undefined values: log_lik[70],The
## following variables have undefined values: log_lik[71],The following
## variables have undefined values: log_lik[72],The following variables have
## undefined values: log_lik[73],The following variables have undefined values:
## log_lik[74],The following variables have undefined values: log_lik[75],The
## following variables have undefined values: log_lik[76],The following
## variables have undefined values: log_lik[77],The following variables have
## undefined values: log_lik[78],The following variables have undefined values:
## log_lik[79],The following variables have undefined values: log_lik[80],The
## following variables have undefined values: log_lik[81],The following
## variables have undefined values: log_lik[82],The following variables have
## undefined values: log_lik[83],The following variables have undefined values:
## log_lik[84],The following variables have undefined values: log_lik[85],The
## following variables have undefined values: log_lik[86],The following
## variables have undefined values: log_lik[87],The following variables have
## undefined values: log_lik[88],The following variables have undefined values:
## log_lik[89],The following variables have undefined values: log_lik[90],The
## following variables have undefined values: log_lik[91],The following
## variables have undefined values: log_lik[92],The following variables have
## undefined values: log_lik[93],The following variables have undefined values:
## log_lik[94],The following variables have undefined values: log_lik[95],The
## following variables have undefined values: log_lik[96],The following
## variables have undefined values: log_lik[97],The following variables have
## undefined values: log_lik[98],The following variables have undefined values:
## log_lik[99],The following variables have undefined values: log_lik[100],The
## following variables have undefined values: log_lik[101],The following
## variables have undefined values: log_lik[102],The following variables have
## undefined values: log_lik[103],The following variables have undefined values:
## log_lik[104],The following variables have undefined values: log_lik[105],The
## following variables have undefined values: log_lik[106],The following
## variables have undefined values: log_lik[107],The following variables have
## undefined values: log_lik[108],The following variables have undefined values:
## log_lik[109],The following variables have undefined values: log_lik[110],The
## following variables have undefined values: log_lik[111],The following
## variables have undefined values: log_lik[112],The following variables have
## undefined values: log_lik[113],The following variables have undefined values:
## log_lik[114],The following variables have undefined values: log_lik[115],The
## following variables have undefined values: log_lik[116],The following
## variables have undefined values: log_lik[117],The following variables have
## undefined values: log_lik[118],The following variables have undefined values:
## log_lik[119],The following variables have undefined values: log_lik[120],The
## following variables have undefined values: log_lik[121],The following
## variables have undefined values: log_lik[122],The following variables have
## undefined values: log_lik[123],The following variables have undefined values:
## log_lik[124],The following variables have undefined values: log_lik[125],The
## following variables have undefined values: log_lik[126],The following
## variables have undefined values: log_lik[127],The following variables have
## undefined values: log_lik[128],The following variables have undefined values:
## log_lik[129],The following variables have undefined values: log_lik[130],The
## following variables have undefined values: log_lik[131],The following
## variables have undefined values: log_lik[132],The following variables have
## undefined values: log_lik[133],The following variables have undefined values:
## log_lik[134],The following variables have undefined values: log_lik[135],The
## following variables have undefined values: log_lik[136],Th
t_m3 = Sys.time()
Summarize_BWQS_performance <- function(model, df){
model_type = deparse(substitute(model))
waic = extract_waic(model)$waic[[1]]
bayesr2 = Compute_Bayes_R2(model)$meanr2
rmse = sqrt(mean((df$pos_per_100000 - colMeans(extract(model,"y_new")$y_new))^2))
effect_estimate = exp(mean(extract(model, "beta1")$beta1))
summarized <- bind_cols(model_name = as.character(model_type), waic = waic, bayesr2 = bayesr2, rmse = rmse, effect_estimate = effect_estimate)
return(summarized)
}
bind_rows(Summarize_BWQS_performance(m1, SES_zcta_median_testing),
Summarize_BWQS_performance(m2_median, SES_zcta_median_testing),
Summarize_BWQS_performance(m2_3q, SES_zcta_3q_testing),
)
## # A tibble: 3 x 5
## model_name waic bayesr2 rmse effect_estimate
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 m1 2370. 0.934 184. 1.08
## 2 m2_median 2362. 0.936 185. 1.08
## 3 m2_3q 2359. 0.937 183. 1.08
#### Part 3: Spatial analysis of mortality in relation to BWQS scores ####
# Step 1: Create dataframes with the relevant information
deaths_by23May2020_by_zcta1 <- deaths_by23May2020_by_zcta %>%
left_join(., modzcta_to_zcta, by = c("MODIFIED_ZCTA" = "MODZCTA")) %>%
mutate(zcta = as.character(MODIFIED_ZCTA)) %>%
rename("deaths_count" = "COVID_DEATH_COUNT") %>%
dplyr::select(zcta, deaths_count, COVID_DEATH_RATE) %>%
distinct(zcta, .keep_all = T)
ZCTA_BWQS_COVID_shp1 <- ZCTA_ACS_COVID_shp %>%
left_join(.,ZCTA_BWQS_score, by = "zcta") %>%
left_join(., deaths_by23May2020_by_zcta1, by = "zcta") %>%
mutate(prop_65plus = age65_plus/total_pop1,
zcta = as.numeric(zcta))
# Figure 3C - Mortality
fig3c <- ZCTA_BWQS_COVID_shp1 %>%
ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(aes(fill = COVID_DEATH_RATE), lwd = 0.2)+
scalebar(MODZCTA_NYC_shp1, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5,
anchor = c(x = -73.71, y = 40.51)) +
labs(fill = "Mortality per 100,000") +
ggtitle("Cumulative COVID mortality by zip code (May 23, 2020)") +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(ZCTA_BWQS_COVID_shp),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
theme_smallmaps
# Combine subfigures to make Figure 3: maps of BWQS, Tests, and Mortality by ZCTA
fig3a_2 = fig3a + labs(tag = "a") + theme(plot.tag.position = c(-0.018, 0.985), plot.tag = element_text(face = "bold", size = 15))
fig3b_2 = fig3b + labs(tag = "b") + theme(plot.tag.position = c(-0.028, 0.93), plot.tag = element_text(face = "bold", size = 15))
fig3c_2 = fig3c + labs(tag = "c") + theme(plot.tag.position = c(-0.028, 0.93), plot.tag = element_text(face = "bold", size = 15))
plotres = 300
agg_png(filename = file.path(fig.path, paste0("fig3_combined_", Sys.Date(), ".png")),
width = plotres*4.25, height = plotres*6, scaling = 2)
grid.arrange(arrangeGrob(fig3a_2, ncol = 1, nrow = 1),
arrangeGrob(fig3b_2, fig3c_2, ncol = 2, nrow = 1),
nrow = 2, ncol = 1, heights = c(1.9,1))
dev.off()
## png
## 2
# Step 2: Run negative binomial model with spatial filtering
# Step 2a: Identify the neighbor relationships
spdat.sens <- as_Spatial(ZCTA_BWQS_COVID_shp1)
ny.nb6 <- knearneigh(sp::coordinates(spdat.sens), k=6)
ny.nb6 <- knn2nb(ny.nb6)
ny.nb6 <- make.sym.nb(ny.nb6)
ny.wt6 <- nb2listw(ny.nb6, style="W")
# Step 2b: Fit the model to identify the component of the data with substantial spatial autocorrelation
fit.nb.ny.sens<-glm.nb(deaths_count~offset(log(total_pop1))+BWQS_index, spdat.sens)
lm.morantest(fit.nb.ny.sens, listw = ny.wt6)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## BWQS_index, data = spdat.sens, init.theta = 6.682813998, link = log)
## weights: ny.wt6
##
## Moran I statistic standard deviate = 3.1743, p-value = 0.000751
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.124763298 -0.001792051 0.001589528
me.fit.sens <- spatialreg::ME(deaths_count~offset(log(total_pop1))+BWQS_index,
spdat.sens@data, family=negative.binomial(fit.nb.ny.sens$theta), listw = ny.wt6, verbose=T, alpha=.1, nsim = 999)
## eV[,16], I: 0.05665305 ZI: NA, pr(ZI): 0.063
## eV[,23], I: 0.01207501 ZI: NA, pr(ZI): 0.342
# Step 2c: Pull out these fits and visualize the autocorrelation
fits.sens <- data.frame(fitted(me.fit.sens))
spdat.sens$me16 <- fits.sens$vec16
spdat.sens$me23 <- fits.sens$vec23
spplot(spdat.sens, "me16", at=quantile(spdat.sens$me16, p=seq(0,1,length.out = 7)))
spplot(spdat.sens, "me23", at=quantile(spdat.sens$me23, p=seq(0,1,length.out = 7)))
# Step 2d: Include the fits in our regression model as an additional parameter
clean.nb.sens<-glm.nb(deaths_count~offset(log(total_pop1))+BWQS_index+fitted(me.fit.sens), spdat.sens@data)
tidy(clean.nb.sens) %>% mutate(estimate_exp = exp(estimate))
## # A tibble: 4 x 6
## term estimate std.error statistic p.value estimate_exp
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.29 0.0839 -86.9 0. 0.000679
## 2 BWQS_index 0.181 0.0151 12.0 3.73e-33 1.20
## 3 fitted(me.fit.sens)vec16 -0.925 0.383 -2.41 1.58e- 2 0.397
## 4 fitted(me.fit.sens)vec23 -1.78 0.381 -4.66 3.16e- 6 0.169
as_tibble(confint(clean.nb.sens), rownames = "vars")%>% mutate_at(vars(2:3), .funs = list(~exp(.)))
## Waiting for profiling to be done...
## # A tibble: 4 x 3
## vars `2.5 %` `97.5 %`
## <chr> <dbl> <dbl>
## 1 (Intercept) 0.000575 0.000804
## 2 BWQS_index 1.16 1.23
## 3 fitted(me.fit.sens)vec16 0.188 0.836
## 4 fitted(me.fit.sens)vec23 0.0789 0.361
lm.morantest(clean.nb.sens, resfun = residuals, listw=ny.wt6)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## BWQS_index + fitted(me.fit.sens), data = spdat.sens@data, init.theta =
## 8.00423828, link = log)
## weights: ny.wt6
##
## Moran I statistic standard deviate = 1.643, p-value = 0.05019
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.063550312 -0.002549371 0.001618464
spdat.sens$nb_residuals <- clean.nb.sens$residuals
spplot(spdat.sens, "nb_residuals", at=quantile(spdat.sens$nb_residuals, p=seq(0,1,length.out = 10)))
pal_sfig10 <- brewer_pal(type = "div", palette = "RdYlBu")(5)[2:5]
pal_sfig10[[2]] <- "#fafadf" # make yellow less saturated
sfig10 <- ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(data = st_as_sf(spdat.sens), aes(fill = cut_width(nb_residuals, width = 1)), lwd = 0.2) +
scale_fill_discrete(type = pal_sfig10) +
coord_sf(crs = st_crs(MODZCTA_NYC_shp1),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
labs(fill = "Residuals\n(Standard Deviations)") +
theme(panel.background = element_rect(fill = "#cccccc"),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.title = element_text(face = "bold", size = 9),
legend.text = element_text(size = 8.5),
plot.margin = unit(c(0.1,0.1,0.1,0.1), "in")
)
if(export.figs) ggsave(sfig10, filename = file.path(fig.path, paste0("sfig10", "_", Sys.Date(),".png")),
dpi = 300, width = 5, height = 3.2)
#### Sensitivity Analyses ####
# half-Cauchy prior for overdispersion parameter
BWQS_NB_halfcauchy_stan_model <- here("code", "nb_bwqs_halfcauchy.stan")
m1_halfcauchy <- fit_BWQS_model(m1data$data_list, BWQS_NB_halfcauchy_stan_model)
# beta1 effect estimate using half-cauchy prior for overdispersion
exp(summary(m1_halfcauchy)$summary[3,c(1,4,8)])
## mean 2.5% 97.5%
## 1.077273 1.066034 1.089665
# main model (inverse gamma prior for overdispersion)
exp(summary(m1)$summary[3,c(1,4,8)])
## mean 2.5% 97.5%
## 1.076975 1.064520 1.089281
# no meaningful difference in effect estimate or credible interval!
# BWQS weights -- stability of the rankings
as_tibble(extract(m1, c("W[1]","W[2]", "W[3]", "W[4]", "W[5]", "W[6]", "W[7]", "W[8]", "W[9]", "W[10]"))) %>%
rownames_to_column("iteration") %>%
gather(weight, value, "W[1]":"W[10]") %>%
group_by(iteration) %>%
slice(which.max(value)) %>%
ungroup() %>%
group_by(weight) %>%
dplyr::summarise(times_largest_weight = n()) %>%
mutate(pct_largest_weight = round((times_largest_weight/1750)*100, 2))
## # A tibble: 8 x 3
## weight times_largest_weight pct_largest_weight
## <chr> <int> <dbl>
## 1 W[1] 94 5.37
## 2 W[10] 457 26.1
## 3 W[2] 564 32.2
## 4 W[4] 4 0.23
## 5 W[5] 13 0.74
## 6 W[7] 504 28.8
## 7 W[8] 35 2
## 8 W[9] 79 4.51
# using essential workers, testing ratio, and median income
glm_esstl_and_income <- glm.nb(pos_per_100000 ~ not_quarantined_jobs + medincome + testing_ratio, data = ZCTA_ACS_COVID)
broom::tidy(glm_esstl_and_income)
## # A tibble: 4 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 6.34 0.0701 90.4 0.
## 2 not_quarantined_jobs 0.0185 0.00248 7.46 8.96e- 14
## 3 medincome -0.00000250 0.000000359 -6.96 3.36e- 12
## 4 testing_ratio 19.5 0.888 22.0 4.07e-107
enframe(predict(glm_esstl_and_income)) %>% mutate(value = exp(value))
## # A tibble: 177 x 2
## name value
## <chr> <dbl>
## 1 1 1293.
## 2 2 1301.
## 3 3 877.
## 4 4 870.
## 5 5 710.
## 6 6 787.
## 7 7 677.
## 8 8 1200.
## 9 9 987.
## 10 10 891.
## # … with 167 more rows
# Just using proportion uninsured and median income
glm_insurance_and_income <- glm.nb(pos_per_100000 ~ not_insured + medincome, data = ZCTA_ACS_COVID)
broom::tidy(glm_insurance_and_income)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 8.14 0.120 68.0 0.
## 2 not_insured -0.00420 0.00857 -0.490 6.24e- 1
## 3 medincome -0.00000737 0.000000891 -8.26 1.41e-16
# PCA of social variables
pca_socialvars <- prcomp(ZCTA_ACS_COVID %>% dplyr::select(all_of(SES_vars)), center = T, scale. = T)
PC1 <- enframe(pca_socialvars$rotation[,1]) %>% arrange(desc(value)) %>% rename("label" = "name")
df_for_PCA_analysis <- ZCTA_ACS_COVID %>% bind_cols(., PC1 = pca_socialvars$x[,1])
# Compare BWQS ranks to Principal Components ranks
BWQS_fits %>%
dplyr::select(mean, label) %>%
mutate(rank_bwqs = rank(mean)) %>%
left_join(., PC1, by = "label") %>%
mutate(rank_pca = rank(value)) %>%
dplyr::select(2, 3, 5, 1, 4)
## # A tibble: 10 x 5
## label rank_bwqs rank_pca mean value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 not_insured 10 4 0.177 0.317
## 2 essentialworker_drove 9 2 0.166 0.201
## 3 avg_hhold_size 8 9 0.161 0.382
## 4 res_vol_popdensity 7 3 0.108 0.217
## 5 one_over_medincome 6 8 0.105 0.363
## 6 didnot_workhome_commute 5 7 0.0894 0.359
## 7 not_quarantined_jobs 4 10 0.0618 0.397
## 8 unemployed 3 6 0.0547 0.358
## 9 essentialworker_pubtrans 2 5 0.0498 0.335
## 10 one_over_grocers_per_1000 1 1 0.0267 0.0891
glm_princomp <- glm.nb(pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3), data = df_for_PCA_analysis)
summary(glm_princomp)
##
## Call:
## glm.nb(formula = pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3),
## data = df_for_PCA_analysis, init.theta = 94.1393225, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5328 -0.5874 0.0735 0.5595 3.0695
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.635507 0.047025 141.10 <2e-16 ***
## PC1 0.071785 0.004996 14.37 <2e-16 ***
## ns(testing_ratio, df = 3)1 0.980565 0.042292 23.19 <2e-16 ***
## ns(testing_ratio, df = 3)2 2.033267 0.106250 19.14 <2e-16 ***
## ns(testing_ratio, df = 3)3 1.135913 0.053253 21.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(94.1393) family taken to be 1)
##
## Null deviance: 2797.59 on 176 degrees of freedom
## Residual deviance: 179.89 on 172 degrees of freedom
## AIC: 2381
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 94.1
## Std. Err.: 10.7
##
## 2 x log-likelihood: -2368.997
exp(confint(glm.nb(pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3), data = df_for_PCA_analysis)))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 693.994592 836.504296
## PC1 1.063870 1.085062
## ns(testing_ratio, df = 3)1 2.456486 2.893115
## ns(testing_ratio, df = 3)2 6.178602 9.435865
## ns(testing_ratio, df = 3)3 2.805253 3.461891
enframe(predict(glm_princomp)) %>% mutate(glm_princomp = exp(value)) %>% dplyr::select(glm_princomp)
## # A tibble: 177 x 1
## glm_princomp
## <dbl>
## 1 1333.
## 2 1088.
## 3 727.
## 4 943.
## 5 741.
## 6 819.
## 7 891.
## 8 1065.
## 9 920.
## 10 809.
## # … with 167 more rows
enframe(predict(glm_esstl_and_income)) %>% mutate(glm_esstl_and_income = exp(value)) %>% dplyr::select(glm_esstl_and_income)
## # A tibble: 177 x 1
## glm_esstl_and_income
## <dbl>
## 1 1293.
## 2 1301.
## 3 877.
## 4 870.
## 5 710.
## 6 787.
## 7 677.
## 8 1200.
## 9 987.
## 10 891.
## # … with 167 more rows
Compare_Metrics <- bind_cols(ZCTA_ACS_COVID,
glm_princomp = enframe(predict(glm_princomp)) %>% mutate(glm_princomp = exp(value)) %>% dplyr::select(glm_princomp),
glm_esstl_and_income = enframe(predict(glm_esstl_and_income)) %>% mutate(glm_esstl_and_income = exp(value)) %>% dplyr::select(glm_esstl_and_income),
BWQS_index = colMeans(extract(m1,"y_new")$y_new))
# comparison of RMSE and Kendall's tau metrics for three different modeling approaches
# Supplemental Table 4
bind_rows(Compare_Metrics %>%
summarise_at(vars(c("BWQS_index", "glm_princomp", "glm_esstl_and_income")),
~cor(pos_per_100000, .x, method = "kendall")) %>%
mutate(parameter = "kendalls"),
Compare_Metrics %>%
summarise_at(vars(c("BWQS_index", "glm_princomp", "glm_esstl_and_income")),
~sqrt(mean(pos_per_100000-.x)^2)) %>%
mutate(parameter = "RMSE"))
## # A tibble: 2 x 4
## BWQS_index glm_princomp glm_esstl_and_income parameter
## <dbl> <dbl> <dbl> <chr>
## 1 0.869 0.850 0.823 kendalls
## 2 2.79 2.13 8.10 RMSE
#### Subway analyses ####
# Different BWQS groupings for subway analysis (<.25, .25-.75, .75)
UHF_BWQS_COVID_3split_shp <- UHF_shp %>%
mutate(uhf = as.character(UHFCODE)) %>%
left_join(., UHF_BWQS, by = "uhf") %>%
mutate(Risk = factor(ifelse(uhf_weighted_bwqs < quantile(uhf_weighted_bwqs, probs = .25, na.rm = T), "Low",
if_else(uhf_weighted_bwqs > quantile(uhf_weighted_bwqs, probs = .75, na.rm = T), "High", "Mid")),
levels = c("High", "Mid", "Low")))
Subway_BWQS_3split_df <- Subway_ridership_by_UHF %>%
left_join(., st_set_geometry(UHF_BWQS_COVID_3split_shp, NULL), by = c("place" = "uhf")) %>%
filter(!is.na(place) & date >= "2020-01-29" & date <= "2020-04-30") %>%
group_by(place) %>%
arrange(date) %>%
mutate(time_index = time(date)) %>%
filter(!is.na(Risk) & date != "2020-02-17") %>% #removing Presidents' Day national holiday
anti_join(., service_changes_in_lowsubway_areas, by = c("date", "place")) #removing low outliers due to service changes in low subway density areas
fit_drm_risk_3split <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, separate = F)
anova(fit_drm_all, fit_drm_risk_3split)
##
## 1st model
## fct: W2.4()
## pmodels: 1 (for all parameters)
## 2nd model
## fct: W2.4()
## pmodels: Risk (for all parameters)
## ANOVA table
##
## ModelDf RSS Df F value p value
## 1st model 3291 21.302
## 2nd model 3283 15.432 8 156.1 0.0
summary(fit_drm_risk_3split)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:Low -12.0668117 0.4809539 -25.089 < 2.2e-16 ***
## b:Mid -11.0653059 0.3525339 -31.388 < 2.2e-16 ***
## b:High -10.8697531 0.3999555 -27.177 < 2.2e-16 ***
## c:Low 0.0728584 0.0038983 18.690 < 2.2e-16 ***
## c:Mid 0.1294325 0.0033939 38.137 < 2.2e-16 ***
## c:High 0.1751494 0.0040700 43.034 < 2.2e-16 ***
## d:Low 0.9805997 0.0035709 274.612 < 2.2e-16 ***
## d:Mid 0.9598153 0.0028993 331.053 < 2.2e-16 ***
## [ reached getOption("max.print") -- omitted 4 rows ]
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.06856061 (3283 degrees of freedom)
compParm(fit_drm_risk_3split, "b", operator = "-")
##
## Comparison of parameter 'b'
##
## Estimate Std. Error t-value p-value
## Low-Mid -1.00151 0.59632 -1.6795 0.09315 .
## Low-High -1.19706 0.62552 -1.9137 0.05575 .
## Mid-High -0.19555 0.53315 -0.3668 0.71380
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compParm(fit_drm_risk_3split, "c", operator = "-")
##
## Comparison of parameter 'c'
##
## Estimate Std. Error t-value p-value
## Low-Mid -0.0565742 0.0051687 -10.9456 < 2.2e-16 ***
## Low-High -0.1022910 0.0056357 -18.1504 < 2.2e-16 ***
## Mid-High -0.0457168 0.0052994 -8.6268 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(fit_drm_risk_3split)
fit_drm_risk_3split_high <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="High")
fit_drm_risk_3split_mid <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="Mid")
fit_drm_risk_3split_low <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="Low")
summary(fit_drm_risk_3split_high)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -10.8693720 0.3278018 -33.158 < 2.2e-16 ***
## c:(Intercept) 0.1751430 0.0033359 52.502 < 2.2e-16 ***
## d:(Intercept) 0.9758825 0.0027014 361.256 < 2.2e-16 ***
## e:(Intercept) 46.8619040 0.1050027 446.292 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.05619323 (1008 degrees of freedom)
summary(fit_drm_risk_3split_mid)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.0658766 0.3964936 -27.909 < 2.2e-16 ***
## c:(Intercept) 0.1294362 0.0038167 33.913 < 2.2e-16 ***
## d:(Intercept) 0.9598145 0.0032605 294.376 < 2.2e-16 ***
## e:(Intercept) 45.9159522 0.1187447 386.678 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.07710264 (1359 degrees of freedom)
summary(fit_drm_risk_3split_low)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -12.0672287 0.4733213 -25.495 < 2.2e-16 ***
## c:(Intercept) 0.0728676 0.0038363 18.994 < 2.2e-16 ***
## d:(Intercept) 0.9805959 0.0035141 279.045 < 2.2e-16 ***
## e:(Intercept) 44.2637382 0.1136008 389.643 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.06747137 (916 degrees of freedom)
confint(fit_drm_risk_3split_high)
## 2.5 % 97.5 %
## b:(Intercept) -11.5126240 -10.2261199
## c:(Intercept) 0.1685968 0.1816891
## d:(Intercept) 0.9705815 0.9811834
## e:(Intercept) 46.6558551 47.0679529
confint(fit_drm_risk_3split_mid)
## 2.5 % 97.5 %
## b:(Intercept) -11.8436826 -10.2880707
## c:(Intercept) 0.1219489 0.1369234
## d:(Intercept) 0.9534184 0.9662107
## e:(Intercept) 45.6830095 46.1488949
confint(fit_drm_risk_3split_low)
## 2.5 % 97.5 %
## b:(Intercept) -12.99614880 -11.13830857
## c:(Intercept) 0.06533873 0.08039652
## d:(Intercept) 0.97369930 0.98749260
## e:(Intercept) 44.04079005 44.48668636
# b is not actually a slope - but a scaling factor that needs to be transformed into the slope
slopes_3split <- as_tibble(coef(fit_drm_risk_3split_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
spread(., vars, value) %>%
mutate(slope = -b*(-d/(4*e)))
as_tibble(confint(fit_drm_risk_3split_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
filter(vars == "b") %>%
right_join(., slopes_3split, by = "BWQS_risk") %>%
mutate(slope_lower_ci = -`2.5 %`*(-d/(4*e)),
slope_higher_ci = -`97.5 %`*(-d/(4*e))) %>%
dplyr::select(BWQS_risk, slope, slope_lower_ci, slope_higher_ci)
## # A tibble: 3 x 4
## BWQS_risk slope slope_lower_ci slope_higher_ci
## <chr> <dbl> <dbl> <dbl>
## 1 high -0.0566 -0.0599 -0.0532
## 2 mid -0.0578 -0.0619 -0.0538
## 3 low -0.0668 -0.0720 -0.0617
fit_drm_predictions_3split <- as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_high, interval = "confidence"), warning = handler) %>%
bind_cols(., Risk = "High")) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_mid, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Mid"))) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_low, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Low")))
Subway_BWQS_3split_df1 <- bind_cols(Subway_BWQS_3split_df %>% arrange(Risk, date),
fit_drm_predictions_3split %>% dplyr::select(-Risk))
Subway_BWQS_3split_df2 <- Subway_BWQS_3split_df1 %>%
filter(date>"2020-02-16") %>% # subsetting for visualization
mutate(Risk = factor(if_else(Risk == "High", "High (≥ 75%ile)",
if_else(Risk=="Mid", "Mid (IQR)", "Low (≤ 25%ile)")),
levels = c("High (≥ 75%ile)", "Mid (IQR)", "Low (≤ 25%ile)")))
# Supplementary Figure 7
sfig7 <- ggplot() +
geom_jitter(data = Subway_BWQS_3split_df2, aes(x = date, y = usage.median.ratio, color = as.factor(Risk)), alpha = .5, position = position_jitter(height = 0, width = 0.4))+
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "High (≥ 75%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "Mid (IQR)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "Low (≤ 25%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_line(data = Subway_BWQS_3split_df2, aes(x = date, y = Prediction, color = as.factor(Risk))) +
scale_x_date("Date", date_minor_breaks = "1 week") +
scale_y_continuous("Relative Subway Ridership (%)", labels = scales::percent) +
geom_vline(xintercept = date("2020-03-22"),color = "grey30", lty = 2) +
theme_bw(base_size = 16) +
labs(colour="COVID-19 inequity index") +
theme(legend.title = element_text(face = "bold", size = 12), legend.position = c(0.8, 0.7))
if(export.figs) ggsave(sfig7 , filename = file.path(fig.path, paste0("sfig7 ", "_", Sys.Date(),".png")),
dpi = 300, width = 8, height = 6)
# ZCTA level BWQS for subway
Subway_ridership_by_ZCTA <- relative.subway.usage(2020, by = "zcta")
# There are clearly some higher outliers that need to be removed at the zcta level
# Remove the zctas associated with the uhfs below
high_april_bronx_subway <- tibble(date = as.Date(c("2020-04-04", "2020-04-05", "2020-04-11", "2020-04-12", "2020-04-18", "2020-04-19", "2020-04-25", "2020-04-26")),
place = c("10469", "10469","10469","10469","10469","10469","10469","10469"))
Subway_BWQS_ZCTA_df <- Subway_ridership_by_ZCTA %>%
left_join(., ZCTA_BWQS_score, by = c("place" = "zcta")) %>%
filter(!is.na(place) & date >= "2020-01-29" & date <= "2020-04-30") %>%
group_by(place) %>%
arrange(date) %>%
mutate(time_index = time(date)) %>%
ungroup() %>%
filter(!is.na(BWQS_index) & date != "2020-02-17") %>% #removing Presidents' Day national holiday
anti_join(., high_april_bronx_subway, by = c("date", "place")) %>% #removing low outliers due to service changes in low subway density areas
mutate(Risk = factor(if_else(BWQS_index < quantile(BWQS_index, probs = .25), "Low",
if_else(BWQS_index > quantile(BWQS_index, probs = .75), "High", "Mid")),
levels = c("High", "Mid", "Low")))
fit_drm_risk_3split_zcta_high <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="High")
fit_drm_risk_3split_zcta_mid <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="Mid")
fit_drm_risk_3split_zcta_low <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="Low")
summary(fit_drm_risk_3split_zcta_high)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -10.9137233 0.3404024 -32.061 < 2.2e-16 ***
## c:(Intercept) 0.1686448 0.0034263 49.221 < 2.2e-16 ***
## d:(Intercept) 0.9566853 0.0027549 347.266 < 2.2e-16 ***
## e:(Intercept) 46.9474325 0.1081606 434.053 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.09486494 (2748 degrees of freedom)
summary(fit_drm_risk_3split_zcta_mid)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.3113739 0.3758877 -30.092 < 2.2e-16 ***
## c:(Intercept) 0.1340174 0.0035621 37.623 < 2.2e-16 ***
## d:(Intercept) 0.9817241 0.0030165 325.456 < 2.2e-16 ***
## e:(Intercept) 45.9104803 0.1072824 427.940 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.1474759 (5700 degrees of freedom)
summary(fit_drm_risk_3split_zcta_low)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.8046717 0.3455007 -34.167 < 2.2e-16 ***
## c:(Intercept) 0.0619258 0.0028874 21.447 < 2.2e-16 ***
## d:(Intercept) 0.9886886 0.0026770 369.327 < 2.2e-16 ***
## e:(Intercept) 43.9322297 0.0871923 503.854 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.08812689 (2756 degrees of freedom)
# b is not actually a slope - but a scaling factor that needs to be transformed into the slope
slopes_zcta <- as_tibble(coef(fit_drm_risk_3split_zcta_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_zcta_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_zcta_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
spread(., vars, value) %>%
mutate(slope = -b*(-d/(4*e)))
as_tibble(confint(fit_drm_risk_3split_zcta_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_zcta_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_zcta_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
filter(vars == "b") %>%
right_join(., slopes_zcta, by = "BWQS_risk") %>%
mutate(slope_lower_ci = -`2.5 %`*(-d/(4*e)),
slope_higher_ci = -`97.5 %`*(-d/(4*e))) %>%
dplyr::select(BWQS_risk, slope, slope_lower_ci, slope_higher_ci)
## # A tibble: 3 x 4
## BWQS_risk slope slope_lower_ci slope_higher_ci
## <chr> <dbl> <dbl> <dbl>
## 1 high -0.0556 -0.0590 -0.0522
## 2 mid -0.0605 -0.0644 -0.0565
## 3 low -0.0664 -0.0702 -0.0626
fit_drm_predictions_zcta <- as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_high, interval = "confidence"), warning = handler) %>%
bind_cols(., Risk = "High")) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_mid, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Mid"))) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_low, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Low"))) # %>% mutate(time_index = row_number())
Subway_BWQS_ZCTA_df1 <- bind_cols(Subway_BWQS_ZCTA_df %>% arrange(Risk, date),
fit_drm_predictions_zcta %>% dplyr::select(-Risk))
Subway_BWQS_ZCTA_df2 <- Subway_BWQS_ZCTA_df1 %>%
filter(date>"2020-02-16") %>% # subsetting for visualization
mutate(Risk = factor(if_else(Risk == "High", "High (≥ 75%ile)",
if_else(Risk=="Mid", "Mid (IQR)", "Low (≤ 25%ile)")),
levels = c("High (≥ 75%ile)", "Mid (IQR)", "Low (≤ 25%ile)")))
# Supplementary Figure 8
sfig8 <- ggplot() +
geom_jitter(data = Subway_BWQS_ZCTA_df2, aes(x = date, y = usage.median.ratio, color = Risk), size = 0.3, alpha = 0.3, position = position_jitter(height = 0, width = 0.42))+
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "High (≥ 75%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "Mid (IQR)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "Low (≤ 25%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_line(data = Subway_BWQS_ZCTA_df2, aes(x = date, y = Prediction, color = Risk)) +
scale_x_date("Date", date_minor_breaks = "1 week") +
scale_y_continuous("Relative Subway Ridership (%)", labels = scales::percent) +
geom_vline(xintercept = date("2020-03-22"),color = "grey30", lty = 2) +
theme_bw(base_size = 16) +
labs(colour="COVID-19 inequity index") +
theme(legend.title = element_text(face = "bold", size = 12), legend.position = c(0.8, 0.7))
if(export.figs) ggsave(sfig8 , filename = file.path(fig.path, paste0("sfig8", "_", Sys.Date(),".png")),
dpi = 300, width = 8, height = 6)
# Supplementary Figure 9: compare MTA turnstile data to Google mobility reports
source(here("code/mta_vs_google.R"), echo = FALSE)
mobplot
#### Appendix ####
t_final = Sys.time()
print(t_start)
## [1] "2020-11-24 19:37:44 EST"
print(t_turnstile_1)
## [1] "2020-11-24 19:37:49 EST"
print(t_turnstile_2)
## [1] "2020-11-24 19:52:05 EST"
print(t_census)
## [1] "2020-11-24 19:57:07 EST"
print(t_dataprep)
## [1] "2020-11-24 20:05:43 EST"
print(t_m1)
## [1] "2020-11-24 20:13:43 EST"
print(t_postm1)
## [1] "2020-11-24 20:13:52 EST"
print(t_m2)
## [1] "2020-11-24 20:20:06 EST"
print(t_m3)
## [1] "2020-11-24 20:24:50 EST"
print(t_final)
## [1] "2020-11-24 20:32:08 EST"
# Total runtime:
print(t_final - t_start)
## Time difference of 54.40026 mins
# Time part 1: Script start through loading of MTA Turnstile package:
print(t_turnstile_1 - t_start)
## Time difference of 5.607934 secs
# Time part 2: Downloading (if this is the first run) and processing of subway turnstile data by neighborhood:
print(t_turnstile_2 - t_turnstile_1)
## Time difference of 14.25564 mins
# Time part 3: Downloading of all other data and processing of Census data:
print(t_census - t_turnstile_2)
## Time difference of 5.045368 mins
# Time part 4: All other data processing, modeling, and figure generation:
print(t_final - t_census)
## Time difference of 35.00579 mins
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.2 (2020-06-22)
## os Ubuntu 18.04.4 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-11-24
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## abind * 1.4-5 2016-07-21 [2] CRAN (R 4.0.2)
## acepack 1.4.1 2016-10-29 [2] CRAN (R 4.0.2)
## askpass 1.1 2019-01-13 [2] CRAN (R 4.0.2)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.0.2)
## backports 1.1.8 2020-06-17 [2] CRAN (R 4.0.2)
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 4.0.2)
## BBmisc 1.11 2017-03-10 [2] CRAN (R 4.0.2)
## bit 1.1-15.2 2020-02-10 [2] CRAN (R 4.0.2)
## bit64 0.9-7 2017-05-08 [2] CRAN (R 4.0.2)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 4.0.2)
## blob 1.2.1 2020-01-20 [2] CRAN (R 4.0.2)
## boot 1.3-25 2020-04-26 [3] CRAN (R 4.0.0)
## broom * 0.5.6 2020-04-20 [2] CRAN (R 4.0.2)
## callr 3.4.3 2020-03-28 [2] CRAN (R 4.0.2)
## car 3.0-8 2020-05-21 [2] CRAN (R 4.0.2)
## carData 3.0-4 2020-05-22 [2] CRAN (R 4.0.2)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.0.2)
## checkmate 2.0.0 2020-02-06 [2] CRAN (R 4.0.2)
## class 7.3-17 2020-04-26 [3] CRAN (R 4.0.0)
## classInt 0.4-3 2020-04-07 [2] CRAN (R 4.0.2)
## cli 2.0.2 2020-02-28 [2] CRAN (R 4.0.2)
## cluster 2.1.0 2019-06-19 [3] CRAN (R 4.0.0)
## coda 0.19-3 2019-07-05 [2] CRAN (R 4.0.2)
## codetools 0.2-16 2018-12-24 [3] CRAN (R 4.0.0)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 4.0.2)
## corrplot * 0.84 2017-10-16 [2] CRAN (R 4.0.2)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 4.0.2)
## curl 4.3 2019-12-02 [2] CRAN (R 4.0.2)
## data.table * 1.13.0 2020-07-24 [1] CRAN (R 4.0.2)
## DBI 1.1.0 2019-12-15 [2] CRAN (R 4.0.2)
## dbplyr 1.4.4 2020-05-27 [2] CRAN (R 4.0.2)
## deldir 0.1-25 2020-02-03 [2] CRAN (R 4.0.2)
## DHARMa * 0.3.3.0 2020-09-08 [2] CRAN (R 4.0.2)
## digest 0.6.25 2020-02-23 [2] CRAN (R 4.0.2)
## dplyr * 1.0.0 2020-05-29 [2] CRAN (R 4.0.2)
## drc * 3.0-1 2016-08-30 [1] CRAN (R 4.0.2)
## e1071 1.7-3 2019-11-26 [2] CRAN (R 4.0.2)
## egg * 0.4.5 2019-07-13 [2] CRAN (R 4.0.2)
## ellipsis 0.3.1 2020-05-15 [2] CRAN (R 4.0.2)
## evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.2)
## expm 0.999-4 2019-03-21 [2] CRAN (R 4.0.2)
## fansi 0.4.1 2020-01-08 [2] CRAN (R 4.0.2)
## farver 2.0.3 2020-01-16 [2] CRAN (R 4.0.2)
## fastmap 1.0.1 2019-10-08 [2] CRAN (R 4.0.2)
## fastmatch 1.1-0 2017-01-28 [2] CRAN (R 4.0.2)
## forcats * 0.5.0 2020-03-01 [2] CRAN (R 4.0.2)
## foreach 1.5.0 2020-03-30 [2] CRAN (R 4.0.2)
## foreign 0.8-79 2020-04-26 [3] CRAN (R 4.0.0)
## Formula 1.2-3 2018-05-03 [2] CRAN (R 4.0.2)
## fs 1.4.2 2020-06-30 [2] CRAN (R 4.0.2)
## fst * 0.9.2 2020-04-01 [2] CRAN (R 4.0.2)
## gap 1.2.2 2020-02-02 [2] CRAN (R 4.0.2)
## gdata 2.18.0 2017-06-06 [2] CRAN (R 4.0.2)
## generics 0.0.2 2018-11-29 [2] CRAN (R 4.0.2)
## ggExtra * 0.9 2019-08-27 [2] CRAN (R 4.0.2)
## ggmap 3.0.0 2019-02-05 [2] CRAN (R 4.0.2)
## ggplot2 * 3.3.2 2020-06-19 [2] CRAN (R 4.0.2)
## ggpubr * 0.4.0 2020-06-27 [1] CRAN (R 4.0.2)
## ggridges * 0.5.2 2020-01-12 [2] CRAN (R 4.0.2)
## ggsignif 0.6.0 2019-08-08 [1] CRAN (R 4.0.2)
## ggsn * 0.5.0 2019-02-18 [2] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gmodels 2.18.1 2018-06-25 [2] CRAN (R 4.0.2)
## gridExtra * 2.3 2017-09-09 [2] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 4.0.2)
## gtools 3.8.2 2020-03-31 [2] CRAN (R 4.0.2)
## haven 2.3.1 2020-06-01 [2] CRAN (R 4.0.2)
## here * 0.1 2017-05-28 [2] CRAN (R 4.0.2)
## highr 0.8 2019-03-20 [2] CRAN (R 4.0.2)
## Hmisc 4.4-0 2020-03-23 [2] CRAN (R 4.0.2)
## hms 0.5.3 2020-01-08 [2] CRAN (R 4.0.2)
## htmlTable 2.0.0 2020-06-21 [2] CRAN (R 4.0.2)
## htmltools 0.5.0 2020-06-16 [2] CRAN (R 4.0.2)
## htmlwidgets 1.5.1 2019-10-08 [2] CRAN (R 4.0.2)
## httpuv 1.5.4 2020-06-06 [2] CRAN (R 4.0.2)
## httr * 1.4.1 2019-08-05 [2] CRAN (R 4.0.2)
## inline 0.3.15 2018-05-18 [2] CRAN (R 4.0.2)
## iterators 1.0.12 2019-07-26 [2] CRAN (R 4.0.2)
## jpeg 0.1-8.1 2019-10-24 [2] CRAN (R 4.0.2)
## jsonlite * 1.7.0 2020-06-25 [2] CRAN (R 4.0.2)
## Just.universal * 0.0.0.9000 2020-11-24 [1] Github (justlab/Just_universal@4e6147a)
## kableExtra * 1.3.1 2020-10-22 [1] CRAN (R 4.0.2)
## KernSmooth 2.23-17 2020-04-26 [3] CRAN (R 4.0.0)
## knitr 1.29 2020-06-23 [2] CRAN (R 4.0.2)
## labeling 0.3 2014-08-23 [2] CRAN (R 4.0.2)
## later 1.1.0.1 2020-06-05 [2] CRAN (R 4.0.2)
## lattice 0.20-41 2020-04-02 [3] CRAN (R 4.0.0)
## latticeExtra 0.6-29 2019-12-19 [2] CRAN (R 4.0.2)
## LearnBayes 2.15.1 2018-03-18 [2] CRAN (R 4.0.2)
## lifecycle 0.2.0 2020-03-06 [2] CRAN (R 4.0.2)
## lme4 1.1-23 2020-04-07 [2] CRAN (R 4.0.2)
## loo 2.2.0 2019-12-19 [2] CRAN (R 4.0.2)
## lubridate * 1.7.9 2020-06-08 [2] CRAN (R 4.0.2)
## lwgeom * 0.2-5 2020-06-12 [2] CRAN (R 4.0.2)
## magic * 1.5-9 2018-09-17 [2] CRAN (R 4.0.2)
## magrittr 1.5 2014-11-22 [2] CRAN (R 4.0.2)
## maptools 1.0-1 2020-05-14 [2] CRAN (R 4.0.2)
## MASS * 7.3-51.6 2020-04-26 [3] CRAN (R 4.0.0)
## Matrix * 1.2-18 2019-11-27 [3] CRAN (R 4.0.0)
## matrixStats * 0.56.0 2020-03-13 [2] CRAN (R 4.0.2)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 4.0.2)
## mgcv 1.8-31 2019-11-09 [3] CRAN (R 4.0.0)
## mime 0.9 2020-02-04 [2] CRAN (R 4.0.2)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.0.2)
## minqa 1.2.4 2014-10-09 [2] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [2] CRAN (R 4.0.2)
## MTA.turnstile * 0.0.0.9001 2020-11-24 [1] Github (justlab/MTA_turnstile@544eba3)
## multcomp 1.4-13 2020-04-08 [2] CRAN (R 4.0.2)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.0.2)
## mvtnorm 1.1-1 2020-06-09 [2] CRAN (R 4.0.2)
## nlme 3.1-147 2020-04-13 [3] CRAN (R 4.0.0)
## nloptr 1.2.2.1 2020-03-11 [2] CRAN (R 4.0.2)
## nnet 7.3-14 2020-04-26 [3] CRAN (R 4.0.0)
## openxlsx 4.1.5 2020-05-06 [2] CRAN (R 4.0.2)
## ParamHelpers 1.14 2020-03-24 [2] CRAN (R 4.0.2)
## pbapply 1.4-3 2020-08-18 [1] CRAN (R 4.0.2)
## pdftools * 2.3.1 2020-05-22 [1] CRAN (R 4.0.2)
## pillar 1.4.4 2020-05-05 [2] CRAN (R 4.0.2)
## pkgbuild 1.0.8 2020-05-07 [2] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.2)
## plotrix 3.7-8 2020-04-16 [2] CRAN (R 4.0.2)
## plyr 1.8.6 2020-03-03 [2] CRAN (R 4.0.2)
## png 0.1-7 2013-12-03 [2] CRAN (R 4.0.2)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.0.2)
## processx 3.4.2 2020-02-09 [2] CRAN (R 4.0.2)
## promises 1.1.1 2020-06-09 [2] CRAN (R 4.0.2)
## ps 1.3.3 2020-05-08 [2] CRAN (R 4.0.2)
## purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.0.2)
## qpdf 1.1 2019-03-07 [1] CRAN (R 4.0.2)
## qs * 0.22.1 2020-06-10 [2] CRAN (R 4.0.2)
## R6 2.4.1 2019-11-12 [2] CRAN (R 4.0.2)
## ragg * 0.4.0 2020-10-05 [1] CRAN (R 4.0.2)
## RANN 2.6.1 2019-01-08 [2] CRAN (R 4.0.2)
## RApiSerialize 0.1.0 2014-04-19 [2] CRAN (R 4.0.2)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 4.0.2)
## raster 3.3-7 2020-06-27 [2] CRAN (R 4.0.2)
## RColorBrewer 1.1-2 2014-12-07 [2] CRAN (R 4.0.2)
## Rcpp 1.0.4.6 2020-04-09 [2] CRAN (R 4.0.2)
## RcppParallel 5.0.2 2020-06-24 [2] CRAN (R 4.0.2)
## readr * 1.3.1 2018-12-21 [2] CRAN (R 4.0.2)
## readxl * 1.3.1 2019-03-13 [2] CRAN (R 4.0.2)
## reprex 0.3.0 2019-05-16 [2] CRAN (R 4.0.2)
## rgdal 1.5-12 2020-06-26 [2] CRAN (R 4.0.2)
## RgoogleMaps 1.4.5.3 2020-02-12 [2] CRAN (R 4.0.2)
## rio 0.5.16 2018-11-26 [2] CRAN (R 4.0.2)
## rjson 0.2.20 2018-06-08 [2] CRAN (R 4.0.2)
## rlang 0.4.6 2020-05-02 [2] CRAN (R 4.0.2)
## rmarkdown 2.3 2020-06-18 [2] CRAN (R 4.0.2)
## rpart 4.1-15 2019-04-12 [3] CRAN (R 4.0.0)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 4.0.2)
## RSQLite 2.2.1 2020-09-30 [2] CRAN (R 4.0.2)
## rstan * 2.19.3 2020-02-11 [2] CRAN (R 4.0.2)
## rstatix 0.6.0 2020-06-18 [1] CRAN (R 4.0.2)
## rstudioapi 0.11 2020-02-07 [2] CRAN (R 4.0.2)
## rvest 0.3.5 2019-11-08 [2] CRAN (R 4.0.2)
## sandwich 2.5-1 2019-04-06 [2] CRAN (R 4.0.2)
## scales * 1.1.1 2020-05-11 [2] CRAN (R 4.0.2)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.2)
## sf * 0.9-6 2020-09-13 [2] CRAN (R 4.0.2)
## shiny 1.5.0 2020-06-23 [2] CRAN (R 4.0.2)
## sp * 1.4-2 2020-05-20 [2] CRAN (R 4.0.2)
## spatialreg * 1.1-5 2019-12-01 [1] CRAN (R 4.0.2)
## spData * 0.3.5 2020-04-06 [2] CRAN (R 4.0.2)
## spdep * 1.1-5 2020-06-29 [2] CRAN (R 4.0.2)
## StanHeaders * 2.21.0-6 2020-08-16 [1] CRAN (R 4.0.2)
## statmod 1.4.34 2020-02-17 [2] CRAN (R 4.0.2)
## stringfish 0.12.1 2020-06-05 [2] CRAN (R 4.0.2)
## stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [2] CRAN (R 4.0.2)
## survival 3.1-12 2020-04-10 [3] CRAN (R 4.0.0)
## systemfonts 0.3.2 2020-09-29 [1] CRAN (R 4.0.2)
## textshaping 0.1.2 2020-10-08 [1] CRAN (R 4.0.2)
## TH.data 1.0-10 2019-01-21 [2] CRAN (R 4.0.2)
## tibble * 3.0.1 2020-04-20 [2] CRAN (R 4.0.2)
## tidycensus * 0.10.2 2020-09-28 [1] CRAN (R 4.0.2)
## tidyr * 1.1.0 2020-05-20 [2] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [2] CRAN (R 4.0.2)
## tidyverse * 1.3.0 2019-11-21 [2] CRAN (R 4.0.2)
## tigris 0.9.4 2020-04-01 [2] CRAN (R 4.0.2)
## units 0.6-7 2020-06-13 [2] CRAN (R 4.0.2)
## utf8 1.1.4 2018-05-24 [2] CRAN (R 4.0.2)
## uuid 0.1-4 2020-02-26 [2] CRAN (R 4.0.2)
## vctrs 0.3.1 2020-06-05 [2] CRAN (R 4.0.2)
## viridisLite 0.3.0 2018-02-01 [2] CRAN (R 4.0.2)
## webshot 0.5.2 2019-11-22 [2] CRAN (R 4.0.2)
## withr 2.2.0 2020-04-20 [2] CRAN (R 4.0.2)
## xfun 0.15 2020-06-21 [2] CRAN (R 4.0.2)
## xgboost 1.1.1.1 2020-06-14 [2] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 4.0.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.0.2)
## yaml 2.2.1 2020-02-01 [2] CRAN (R 4.0.2)
## zip * 2.0.4 2019-09-01 [2] CRAN (R 4.0.2)
## zoo 1.8-8 2020-05-02 [2] CRAN (R 4.0.2)
##
## [1] /home/rushj03/R/x86_64-pc-linux-gnu-library/4.0
## [2] /usr/lib/R/site-library
## [3] /usr/lib/R/library